加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
download_default_datalist.py 2.63 KB
一键复制 编辑 原始数据 按行查看 历史
sovft 提交于 2023-09-15 20:45 . 2023-9-15-19:05
import os
import logging
import json
import requests
logging.basicConfig(
format='%(asctime)s - %(pathname)s[line:%(lineno)d] - %(levelname)s: %(message)s',
level=logging.INFO)
DEFAULT_DATALIST_PATH = 'datalist/'
if not os.path.exists(DEFAULT_DATALIST_PATH):
os.makedirs(DEFAULT_DATALIST_PATH)
URL_DATALIST_INDEX = "https://d.ailemon.net/asrt_assets/datalist/index.json"
rsp_index = requests.get(URL_DATALIST_INDEX)
rsp_index.encoding = 'utf-8'
if rsp_index.ok:
logging.info('Has connected to ailemon\'s download server...')
else:
logging.error('%s%s', 'Can not connected to ailemon\'s download server.',
'please check your network connection.')
index_json = json.loads(rsp_index.text)
if index_json['status_code'] != 200:
raise Exception(index_json['status_message'])
body = index_json['body']
logging.info('start to download datalist from ailemon\'s download server...')
url_prefix = body['url_prefix']
for i in range(len(body['datalist'])):
print(i, body['datalist'][i]['name'])
print(len(body['datalist']), 'all datalist')
num = input('Please choose which you select: (default all)')
if len(num) == 0:
num = len(body['datalist'])
else:
num = int(num)
def deal_download(datalist_item, url_prefix_str, datalist_path):
"""
to deal datalist file download
"""
logging.info('%s%s', 'start to download datalist ', datalist_item['name'])
save_path = os.path.join(datalist_path, datalist_item['name'])
if not os.path.exists(save_path):
os.makedirs(save_path)
logging.info('%s`%s`', 'Created directory ', save_path)
for filename in datalist_item['filelist']:
tmp_url = url_prefix_str + datalist_item['name'] + '/' + filename
save_filename = os.path.join(save_path, filename)
rsp_listfile = requests.get(tmp_url)
with open(save_filename, "wb") as file_pointer:
file_pointer.write(rsp_listfile.content)
if rsp_listfile.ok:
logging.info('%s `%s` %s', 'Download', filename, 'complete')
else:
logging.error('%s%s%s%s%s', 'Can not download ', filename,
' from ailemon\'s download server. ',
'http status ok is ', str(rsp_listfile.ok))
if num == len(body['datalist']):
for i in range(len(body['datalist'])):
deal_download(body['datalist'][i], body['url_prefix'], DEFAULT_DATALIST_PATH)
else:
deal_download(body['datalist'][num], body['url_prefix'], DEFAULT_DATALIST_PATH)
logging.info('%s%s%s', 'Datalist files download complete. ',
'Please remember to download these datasets from ',
body['dataset_download_page_url'])
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化