加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
baidu_ocr.py 6.39 KB
一键复制 编辑 原始数据 按行查看 历史
private-user 提交于 2021-01-28 16:35 . bug fix
# encoding:utf-8
import requests
import base64
import time, datetime
import sys
import os
import json
import queue
import random
from threading import Thread
import threading
# https://cloud.baidu.com/doc/OCR/s/fk3h7xu7h
ACCESS_TOKEN = 'xxxxxxxx'
INPUT = 'idcard_2020.db'
OUTPUT = 'idcard_output_2020.db'
LOG = 'idcard_2020.log'
task_queue = queue.Queue()
result_queue = queue.Queue()
def parse(seq, fpath, imgtype):
request_url = "https://aip.baidubce.com/rest/2.0/ocr/v1/idcard"
request_url = request_url + "?access_token=" + ACCESS_TOKEN
headers = {'content-type': 'application/x-www-form-urlencoded'}
f = open(fpath, 'rb')
data = f.read()
f.close()
img = base64.b64encode(data)
params = {"id_card_side":imgtype,"image": img}
resp = None
try:
resp = requests.post(request_url, data=params, headers=headers)
except:
log('error when deal:' + seq)
return resp
def log(msg):
timestamp = time.strftime("%Y-%m-%d %H:%M:%S", time.localtime(time.time()))
thread_name = threading.current_thread().getName()
f = open(LOG, 'a')
f.write('[' + timestamp + '][' + thread_name + ']' + msg + '\n')
f.close()
def do_deal():
while True:
item = ''
try:
item = task_queue.get_nowait()
except queue.Empty as e:
log("task queue is empty, sleep a while.")
time.sleep(1)
continue
data = deal(item)
result_queue.put(data)
def do_save():
while True:
record = ''
try:
record = result_queue.get_nowait()
except queue.Empty as e:
log("result queue is empty. sleep a while.")
time.sleep(1)
continue
log('record:' + record)
f = open(OUTPUT, 'a')
f.write(record)
f.close()
def deal(f):
device = f.split('/')[4]
seq = f.split('/')[5]
seq = device + seq
name = ''
nation = ''
address = ''
idcard = ''
born = ''
sex = ''
expire = ''
signorg = ''
signdate = ''
front_path = f
back_path = front_path.replace('idcard_front.jpg', 'idcard_back.jpg')
if os.path.exists(front_path):
log('seq:[' + seq + ']fronpath:' + front_path)
ret = parse(seq, front_path, 'front')
if ret == None:
log('response is none')
return
if ret.text.strip() == '':
log('response front is empty')
return
log('seq:[' + seq + ']resp_front_text:' + ret.text)
obj = None
try:
obj = json.loads(ret.text)
except:
log('parse front json error')
try:
if name == '':
name = obj['words_result']['姓名']['words']
except:
pass
try:
if nation == '':
nation = obj['words_result']['民族']['words']
except:
pass
try:
if address == '':
address = obj['words_result']['住址']['words']
except:
pass
try:
if idcard == '':
idcard = obj['words_result']['公民身份号码']['words']
except:
pass
try:
if born == '':
born = obj['words_result']['出生']['words']
except:
pass
try:
if sex == '':
sex = obj['words_result']['性别']['words']
except:
pass
try:
if expire == '':
expire = obj['words_result']['失效日期']['words']
except:
pass
try:
if signorg == '':
signorg = obj['words_result']['签发机关']['words']
except:
pass
try:
if signdate == '':
signdate = obj['words_result']['签发日期']['words']
except:
pass
if os.path.exists(back_path):
log('seq:[' + seq + ']backpath:' + back_path)
ret = parse(seq, back_path, 'back')
if ret == None:
log('response is none')
return
log('seq:[' + seq + ']resp_back_text:' + ret.text)
if ret.text.strip() == '':
log('response back is empty')
return
obj = None
try:
obj = json.loads(ret.text)
except:
pass
try:
if name == '':
name = obj['words_result']['姓名']['words']
except:
pass
try:
if nation == '':
nation = obj['words_result']['民族']['words']
except:
pass
try:
if address == '':
address = obj['words_result']['住址']['words']
except:
pass
try:
if idcard == '':
idcard = obj['words_result']['公民身份号码']['words']
except:
pass
try:
if born == '':
born = obj['words_result']['出生']['words']
except:
pass
try:
if sex == '':
sex = obj['words_result']['性别']['words']
except:
pass
try:
if expire == '':
expire = obj['words_result']['失效日期']['words']
except:
pass
try:
if signorg == '':
signorg = obj['words_result']['签发机关']['words']
except:
pass
try:
if signdate == '':
signdate = obj['words_result']['签发日期']['words']
except:
pass
arr = [seq, name, nation, address, idcard, born, sex, expire, signorg, signdate, '\n']
record = '|'.join(arr)
log(record)
return record
def main():
done_list = [ x.split('|')[0] for x in open(OUTPUT).readlines()]
log('finished create done list')
# build task list ,skip done before
tasks_list = [ x for x in filter(lambda x: True if x.split('/')[4]+x.split('/')[5] not in done_list else False, [ y.strip() for y in open(INPUT).readlines() ]) ]
log('finished create task list')
log('tasklen:' + str(len(tasks_list)))
[ task_queue.put(x) for x in tasks_list ]
log('finished create task queue')
for i in range(6):
Thread(target=do_deal, name='producer'+str(i)).start()
Thread(target=do_save, name='consumer1').start()
if __name__ == '__main__':
main()
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化