加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
downloadFile.py 10.62 KB
一键复制 编辑 原始数据 按行查看 历史
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import datetime
import json
import os
import shutil
import urllib
import urllib2
import zipfile
import os.path
import requests
import re
import openpyxl
from poster.encode import multipart_encode
from poster.streaminghttp import register_openers
import ConfigParser
import sys
import msvcrt
import base64
reload(sys)
sys.setdefaultencoding('utf-8')
cf = ConfigParser.ConfigParser()
cf.read('config.ini')
# dhost = 'localhost:4030'
dhost = '2d.hep.com.cn'
register_openers()
token = ''
def login():
formate = {
"email": email,
"password": password
}
data = urllib.urlencode(formate)
request = urllib2.Request(
'http://' + dhost + '/auth/local', data=data)
response = urllib2.urlopen(request)
result = json.loads(response.read())
data=result.get('data')
if data!=None:
token=data.get('token')
if token!=None:
return token
def getType(typeStr):
f = r"?P<f>\S+"
p = re.compile(r"<.*>(%s)<.*>" % (f), re.VERBOSE)
matchs = p.match(typeStr)
if(matchs != None):
group = matchs.groups()
g = group[0]
g = g.decode('utf8')
if(g[0:2] == u'文档'):
ftype = g[2:]
return(ftype)
def doExcel(path):
wb = openpyxl.load_workbook(path)
#默认获取Sheet1
sheet = wb.get_sheet_by_name('Sheet1')
for row in sheet.rows:
wbs_wl = str(row[0].value)
wuliao = wbs_wl.split('/')[0]
if(wuliao != ''):
url = 'http://' + dhost + \
'/api/v1/books/guess?wuliao='+wuliao
else:
wbs = wbs_wl.split('/')[1]
if(wbs != ''):
url = 'http://' + dhost + \
'/api/v1/books/guess?wbs='+wbs
headers = {'authorization': 'Bearer ' + token}
request = urllib2.Request(url, headers=headers)
response = urllib2.urlopen(request)
result = json.loads(response.read())
if(len(result['rows']) > 0):
# print(result['rows'][0])
id = result['rows'][0]['_id']
title = result['rows'][0]['title']
# print(id)
download_need_file(id, title)
def download_need_file(bookid, title):
title = title+bookid
headers = {'authorization': 'Bearer ' + token}
url = 'http://' + dhost + \
'/api/v1/books/'+bookid+'/resources?limit=500&offset=0&sort=seq'
request = urllib2.Request(url, headers=headers)
response = urllib2.urlopen(request)
result = json.loads(response.read())
# print(result['rows'])
for item in result['rows']:
rid = item.get('_id')
if(rid != None):
rtype = item['type']
ftitle=item['title'].replace('/',' ')
bookid = bookid.encode('utf8')
rid = rid.encode('utf8')
if(rtype == 'file'):
ftype=getType(item['typeStr'])
url=getres(bookid,rid)
if(url!=None):
r = requests.get(url.replace('https', 'http'),stream=True, verify=False)
if r.status_code == 200:
filepath = outputpath + title+'/' + \
ftitle + '.' + ftype
if(os.path.exists(filepath)):
# file exists
print(u'文件已存在,跳过... '+filepath)
else:
if((os.path.exists(outputpath + title)) == False):
os.makedirs(outputpath + title+'/')
print(u'创建文件夹'+outputpath + title+'/')
# new dir
print(u'正在下载... '+filepath)
f = open(filepath, "wb")
for chunk in r.iter_content(chunk_size=512):
if chunk:
f.write(chunk)
f.flush()
if(rtype == 'package'):
ftype='hep5'
url=getres(bookid,rid)
if(url!=None):
r = requests.get(url.replace('https', 'http'),
stream=True, verify=False)
if r.status_code == 200:
filepath = outputpath + title+'/' + \
ftitle + '.' + ftype
if(os.path.exists(filepath)):
# file exists
print(u'文件已存在,跳过... '+filepath)
else:
if((os.path.exists(outputpath + title)) == False):
os.makedirs(outputpath + title+'/')
print(u'创建文件夹'+outputpath + title+'/')
# new dir
print(u'正在下载... '+filepath)
f = open(filepath, "wb")
for chunk in r.iter_content(chunk_size=512):
if chunk:
f.write(chunk)
f.flush()
if(rtype == 'html'): # 有可能是html资源
ftype='html'
content=getContent(bookid, rid)
if(content!=None):
filepath = outputpath + title+'/' + ftitle + '.' + ftype
if(os.path.exists(filepath)):
# file exists
print(u'文件已存在,跳过... '+filepath)
else:
if((os.path.exists(outputpath + title)) == False):
os.makedirs(outputpath + title+'/')
print(u'创建文件夹 '+outputpath + title+'/')
# new dir
print(u'正在下载... '+filepath)
f = open(filepath, "wb")
f.write(content)
f.flush()
if(rtype == 'url'):
ftype='html'
url=getUrl(bookid, rid)
if(url!=None):
content=url+'<a href=\''+url+'\'>访问链接</a>'
filepath = outputpath + title+'/' + ftitle + '.' + ftype
if(os.path.exists(filepath)):
# file exists
print(u'文件已存在,跳过... '+filepath)
else:
if((os.path.exists(outputpath + title)) == False):
os.makedirs(outputpath + title+'/')
print(u'创建文件夹 '+outputpath + title+'/')
# new dir
print(u'正在下载... '+filepath)
f = open(filepath, "wb")
f.write(content)
f.flush()
else:
print("id is none")
def getres(bookid, id):
headers = {'x-access-token': token}
url = 'http://' + dhost + '/api/v1/books/'+bookid+'/resources/'+id
# print(url)
request = urllib2.Request(url, headers=headers)
response = urllib2.urlopen(request)
result = json.loads(response.read())
try:
if(result['data']['downloadUrl'] != ''):
private_url = result['data']['downloadUrl']
return(private_url)
else:
return
except Exception:
return
def getContent(bookid, id):
headers = {'x-access-token': token}
url = 'http://' + dhost + '/api/v1/books/'+bookid+'/resources/'+id
# print(url)
request = urllib2.Request(url, headers=headers)
response = urllib2.urlopen(request)
result = json.loads(response.read())
# print(data)
try:
if(result['data']['content'] != ''):
c = result['data']['content']
c = c.encode('utf8')
return(c)
else:
return
except Exception:
return
def getUrl(bookid, id):
headers = {'x-access-token': token}
url = 'http://' + dhost + '/api/v1/books/'+bookid+'/resources/'+id
# print(url)
request = urllib2.Request(url, headers=headers)
response = urllib2.urlopen(request)
result = json.loads(response.read())
try:
if(result['data']['url'] != ''):
u = result['data']['url']
u = u.encode('utf8')
return(u)
else:
return
except Exception:
return
# username password excel_filepath outputpath
if(os.path.exists('./config.ini')):
print(u"读取配置文件...")
email = cf.get('download', 'email')
password = cf.get('download', 'password')
excel_filepath = cf.get('download', 'excel_filepath')
outputpath = cf.get('download', 'outputpath')
token = login()
if(token==None):
print(u'登录错误')
print(u"按Q键退出程序")
while True:
if ord(msvcrt.getch())in[81,113]:
break
else:
if(os.path.exists(excel_filepath)==False):
print(u'excel文件不存在,请检查excel文件路径')
print(u"按Q键退出程序")
while True:
if ord(msvcrt.getch())in[81,113]:
break
else:
doExcel(excel_filepath)
print(u"按Q键退出程序")
while True:
if ord(msvcrt.getch())in[81,113]:
break
else:
print(u"配置文件不存在")
print(u"将读取输入参数...")
email = raw_input('email:')
password = raw_input("password:")
excel_filepath = raw_input("excel_filepath:")
outputpath = raw_input("outputpath:")
token = login()
if(token==None):
print(u'登录错误,请检查登录账号及密码')
print(u"按Q键退出程序")
while True:
if ord(msvcrt.getch())in[81,113]:
break
else:
if(os.path.exists(excel_filepath)==False):
print(u'excel文件不存在,请检查excel文件路径')
print(u"按Q键退出程序")
while True:
if ord(msvcrt.getch())in[81,113]:
break
else:
doExcel(excel_filepath)
print(u"按Q键退出程序")
while True:
if ord(msvcrt.getch())in[81,113]:
break
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化