master

分支 (1)

管理

管理

master

downloadFile
/
downloadFile.py

#!/usr/bin/env python
# -*- coding: utf-8 -*-
import datetime
import json
import os
import shutil
import urllib
import urllib2
import zipfile
import os.path
import requests
import re
import openpyxl
from poster.encode import multipart_encode
from poster.streaminghttp import register_openers
import ConfigParser
import sys
import msvcrt
import base64
reload(sys)
sys.setdefaultencoding('utf-8')
cf = ConfigParser.ConfigParser()
cf.read('config.ini')
# dhost = 'localhost:4030'
dhost = '2d.hep.com.cn'
register_openers()
token = ''


def login():
    formate = {
        "email": email,
        "password": password
    }
    data = urllib.urlencode(formate)
    request = urllib2.Request(
        'http://' + dhost + '/auth/local', data=data)
    response = urllib2.urlopen(request)
    result = json.loads(response.read())
    data=result.get('data')
    if data!=None:
        token=data.get('token')
        if token!=None:
            return token


def getType(typeStr):
    f = r"?P<f>\S+"
    p = re.compile(r"<.*>(%s)<.*>" % (f), re.VERBOSE)
    matchs = p.match(typeStr)
    if(matchs != None):
        group = matchs.groups()
        g = group[0]
        g = g.decode('utf8')
        if(g[0:2] == u'文档'):
            ftype = g[2:]
            return(ftype)

def doExcel(path):
    wb = openpyxl.load_workbook(path)
    #默认获取Sheet1
    sheet = wb.get_sheet_by_name('Sheet1')
    for row in sheet.rows:
        wbs_wl = str(row[0].value)
        wuliao = wbs_wl.split('/')[0]
        if(wuliao != ''):
            url = 'http://' + dhost + \
                '/api/v1/books/guess?wuliao='+wuliao
        else:
            wbs = wbs_wl.split('/')[1]
            if(wbs != ''):
                url = 'http://' + dhost + \
                    '/api/v1/books/guess?wbs='+wbs
        headers = {'authorization': 'Bearer ' + token}
        request = urllib2.Request(url, headers=headers)
        response = urllib2.urlopen(request)
        result = json.loads(response.read())
        if(len(result['rows']) > 0):
            # print(result['rows'][0])
            id = result['rows'][0]['_id']
            title = result['rows'][0]['title']
            # print(id)
            download_need_file(id, title)


def download_need_file(bookid, title):
    title = title+bookid
    headers = {'authorization': 'Bearer ' + token}
    url = 'http://' + dhost + \
        '/api/v1/books/'+bookid+'/resources?limit=500&offset=0&sort=seq'
    request = urllib2.Request(url, headers=headers)
    response = urllib2.urlopen(request)
    result = json.loads(response.read())
    # print(result['rows'])
    for item in result['rows']:
        rid = item.get('_id')
        if(rid != None):
            rtype = item['type']
            ftitle=item['title'].replace('/',' ')
            bookid = bookid.encode('utf8')
            rid = rid.encode('utf8')
            if(rtype == 'file'):
                ftype=getType(item['typeStr'])
                url=getres(bookid,rid)
                if(url!=None):
                    r = requests.get(url.replace('https', 'http'),stream=True, verify=False)
                    if r.status_code == 200:
                        filepath = outputpath + title+'/' + \
                            ftitle + '.' + ftype
                        if(os.path.exists(filepath)):
                            # file exists
                            print(u'文件已存在，跳过... '+filepath)
                        else:
                            if((os.path.exists(outputpath + title)) == False):
                                os.makedirs(outputpath + title+'/')
                                print(u'创建文件夹'+outputpath + title+'/')
                                # new dir
                            print(u'正在下载... '+filepath)
                            f = open(filepath, "wb")
                            for chunk in r.iter_content(chunk_size=512):
                                if chunk:
                                    f.write(chunk)
                                    f.flush()
            if(rtype == 'package'):
                ftype='hep5'
                url=getres(bookid,rid)
                if(url!=None):
                    r = requests.get(url.replace('https', 'http'),
                                        stream=True, verify=False)
                    if r.status_code == 200:
                        filepath = outputpath + title+'/' + \
                            ftitle + '.' + ftype
                        if(os.path.exists(filepath)):
                            # file exists
                            print(u'文件已存在，跳过... '+filepath)
                        else:
                            if((os.path.exists(outputpath + title)) == False):
                                os.makedirs(outputpath + title+'/')
                                print(u'创建文件夹'+outputpath + title+'/')
                                # new dir
                            print(u'正在下载... '+filepath)
                            f = open(filepath, "wb")
                            for chunk in r.iter_content(chunk_size=512):
                                if chunk:
                                    f.write(chunk)
                                    f.flush()
            if(rtype == 'html'):  # 有可能是html资源
                ftype='html'
                content=getContent(bookid, rid)
                if(content!=None):
                    filepath = outputpath + title+'/' + ftitle + '.' + ftype
                    if(os.path.exists(filepath)):
                        # file exists
                        print(u'文件已存在，跳过... '+filepath)
                    else:
                        if((os.path.exists(outputpath + title)) == False):
                            os.makedirs(outputpath + title+'/')
                            print(u'创建文件夹 '+outputpath + title+'/')
                            # new dir
                        print(u'正在下载... '+filepath)
                        f = open(filepath, "wb")
                        f.write(content)
                        f.flush()
            if(rtype == 'url'):
                ftype='html'
                url=getUrl(bookid, rid)
                if(url!=None):
                    content=url+'<a href=\''+url+'\'>访问链接</a>'
                    filepath = outputpath + title+'/' + ftitle + '.' + ftype
                    if(os.path.exists(filepath)):
                        # file exists
                        print(u'文件已存在，跳过... '+filepath)
                    else:
                        if((os.path.exists(outputpath + title)) == False):
                            os.makedirs(outputpath + title+'/')
                            print(u'创建文件夹 '+outputpath + title+'/')
                            # new dir
                        print(u'正在下载... '+filepath)
                        f = open(filepath, "wb")
                        f.write(content)
                        f.flush()
        else:
            print("id is none")


def getres(bookid, id):
    headers = {'x-access-token': token}
    url = 'http://' + dhost + '/api/v1/books/'+bookid+'/resources/'+id
    # print(url)
    request = urllib2.Request(url, headers=headers)
    response = urllib2.urlopen(request)
    result = json.loads(response.read())
    try:
        if(result['data']['downloadUrl'] != ''):
            private_url = result['data']['downloadUrl']
            return(private_url)
        else:
            return
    except Exception:
        return
def getContent(bookid, id):
    headers = {'x-access-token': token}
    url = 'http://' + dhost + '/api/v1/books/'+bookid+'/resources/'+id
    # print(url)
    request = urllib2.Request(url, headers=headers)
    response = urllib2.urlopen(request)
    result = json.loads(response.read())
    # print(data)
    try:
        if(result['data']['content'] != ''):
            c = result['data']['content']
            c = c.encode('utf8')
            return(c)
        else:
            return
    except Exception:
        return
def getUrl(bookid, id):
    headers = {'x-access-token': token}
    url = 'http://' + dhost + '/api/v1/books/'+bookid+'/resources/'+id
    # print(url)
    request = urllib2.Request(url, headers=headers)
    response = urllib2.urlopen(request)
    result = json.loads(response.read())
    try:
        if(result['data']['url'] != ''):
            u = result['data']['url']
            u = u.encode('utf8')
            return(u)
        else:
            return
    except Exception:
        return


# username password excel_filepath outputpath
if(os.path.exists('./config.ini')):
    print(u"读取配置文件...")
    email = cf.get('download', 'email')
    password = cf.get('download', 'password')
    excel_filepath = cf.get('download', 'excel_filepath')
    outputpath = cf.get('download', 'outputpath')
    token = login()
    if(token==None):
        print(u'登录错误')
        print(u"按Q键退出程序")
        while True:
            if ord(msvcrt.getch())in[81,113]:
                break
    else:
        if(os.path.exists(excel_filepath)==False):
            print(u'excel文件不存在，请检查excel文件路径')
            print(u"按Q键退出程序")
            while True:
                if ord(msvcrt.getch())in[81,113]:
                    break
        else:
            doExcel(excel_filepath)
            print(u"按Q键退出程序")
            while True:
                if ord(msvcrt.getch())in[81,113]:
                    break
else:
    print(u"配置文件不存在")
    print(u"将读取输入参数...")
    email = raw_input('email:')
    password = raw_input("password:")
    excel_filepath = raw_input("excel_filepath:")
    outputpath = raw_input("outputpath:")
    token = login()
    if(token==None):
        print(u'登录错误，请检查登录账号及密码')
        print(u"按Q键退出程序")
        while True:
            if ord(msvcrt.getch())in[81,113]:
                break
    else:
        if(os.path.exists(excel_filepath)==False):
            print(u'excel文件不存在，请检查excel文件路径')
            print(u"按Q键退出程序")
            while True:
                if ord(msvcrt.getch())in[81,113]:
                    break
        else:
            doExcel(excel_filepath)
            print(u"按Q键退出程序")
            while True:
                if ord(msvcrt.getch())in[81,113]:
                    break