加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
idcardocr.py 23.67 KB
一键复制 编辑 原始数据 按行查看 历史
Raymond Wong 提交于 2019-02-26 22:26 . Update idcardocr.py
123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551
# -*- coding: utf-8 -*-
from PIL import Image
import pytesseract
import cv2
import numpy as np
import re
from multiprocessing import Pool, Queue, Lock, Process, freeze_support
import time
#pytesseract.pytesseract.tesseract_cmd = 'C:\\Program Files (x86)\\Tesseract-OCR\\tesseract.exe'
x = 1280.00 / 3840.00
pixel_x = int(x * 3840)
print(x, pixel_x)
#mode0:识别姓名,出生日期,身份证号; mode1:识别所有信息
def idcardocr(imgname, mode=1):
print(u'进入身份证光学识别流程...')
if mode==1:
# generate_mask(x)
img_data_gray, img_org = img_resize_gray(imgname)
result_dict = dict()
name_pic = find_name(img_data_gray, img_org)
# showimg(name_pic)
# print 'name'
result_dict['name'] = get_name(name_pic)
# print result_dict['name']
sex_pic = find_sex(img_data_gray, img_org)
# showimg(sex_pic)
# print 'sex'
result_dict['sex'] = get_sex(sex_pic)
# print result_dict['sex']
nation_pic = find_nation(img_data_gray, img_org)
# showimg(nation_pic)
# print 'nation'
result_dict['nation'] = get_nation(nation_pic)
# print result_dict['nation']
address_pic = find_address(img_data_gray, img_org)
# showimg(address_pic)
# print 'address'
result_dict['address'] = get_address(address_pic)
# print result_dict['address']
idnum_pic = find_idnum(img_data_gray, img_org)
# showimg(idnum_pic)
# print 'idnum'
result_dict['idnum'], result_dict['birth'] = get_idnum_and_birth(idnum_pic)
# print result_dict['idnum']
elif mode==0:
# generate_mask(x)
img_data_gray, img_org = img_resize_gray(imgname)
result_dict = dict()
name_pic = find_name(img_data_gray, img_org)
# showimg(name_pic)
# print 'name'
result_dict['name'] = get_name(name_pic)
# print result_dict['name']
idnum_pic = find_idnum(img_data_gray, img_org)
# showimg(idnum_pic)
# print 'idnum'
result_dict['idnum'], result_dict['birth'] = get_idnum_and_birth(idnum_pic)
result_dict['sex']=''
result_dict['nation']=''
result_dict['address']=''
else:
print(u"模式选择错误!")
#showimg(img_data_gray)
return result_dict
def generate_mask(x):
name_mask_pic = cv2.UMat(cv2.imread('name_mask.jpg'))
sex_mask_pic = cv2.UMat(cv2.imread('sex_mask.jpg'))
nation_mask_pic = cv2.UMat(cv2.imread('nation_mask.jpg'))
birth_mask_pic = cv2.UMat(cv2.imread('birth_mask.jpg'))
year_mask_pic = cv2.UMat(cv2.imread('year_mask.jpg'))
month_mask_pic = cv2.UMat(cv2.imread('month_mask.jpg'))
day_mask_pic = cv2.UMat(cv2.imread('day_mask.jpg'))
address_mask_pic = cv2.UMat(cv2.imread('address_mask.jpg'))
idnum_mask_pic = cv2.UMat(cv2.imread('idnum_mask.jpg'))
name_mask_pic = img_resize_x(name_mask_pic)
sex_mask_pic = img_resize_x(sex_mask_pic)
nation_mask_pic = img_resize_x(nation_mask_pic)
birth_mask_pic = img_resize_x(birth_mask_pic)
year_mask_pic = img_resize_x(year_mask_pic)
month_mask_pic = img_resize_x(month_mask_pic)
day_mask_pic = img_resize_x(day_mask_pic)
address_mask_pic = img_resize_x(address_mask_pic)
idnum_mask_pic = img_resize_x(idnum_mask_pic)
cv2.imwrite('name_mask_%s.jpg'%pixel_x, name_mask_pic)
cv2.imwrite('sex_mask_%s.jpg' %pixel_x, sex_mask_pic)
cv2.imwrite('nation_mask_%s.jpg' %pixel_x, nation_mask_pic)
cv2.imwrite('birth_mask_%s.jpg' %pixel_x, birth_mask_pic)
cv2.imwrite('year_mask_%s.jpg' % pixel_x, year_mask_pic)
cv2.imwrite('month_mask_%s.jpg' % pixel_x, month_mask_pic)
cv2.imwrite('day_mask_%s.jpg' % pixel_x, day_mask_pic)
cv2.imwrite('address_mask_%s.jpg' %pixel_x, address_mask_pic)
cv2.imwrite('idnum_mask_%s.jpg' %pixel_x, idnum_mask_pic)
#用于生成模板
def img_resize_x(imggray):
# print 'dheight:%s' % dheight
crop = imggray
size = crop.get().shape
dheight = int(size[0]*x)
dwidth = int(size[1]*x)
crop = cv2.resize(src=crop, dsize=(dwidth, dheight), interpolation=cv2.INTER_CUBIC)
return crop
#idcardocr里面resize以高度为依据, 用于get部分
def img_resize(imggray, dheight):
# print 'dheight:%s' % dheight
crop = imggray
size = crop.get().shape
height = size[0]
width = size[1]
width = width * dheight / height
crop = cv2.resize(src=crop, dsize=(int(width), dheight), interpolation=cv2.INTER_CUBIC)
return crop
def img_resize_gray(imgorg):
#imgorg = cv2.imread(imgname)
crop = imgorg
size = cv2.UMat.get(crop).shape
# print size
height = size[0]
width = size[1]
# 参数是根据3840调的
height = int(height * 3840 * x / width)
# print height
crop = cv2.resize(src=crop, dsize=(int(3840 * x), height), interpolation=cv2.INTER_CUBIC)
return hist_equal(cv2.cvtColor(crop, cv2.COLOR_BGR2GRAY)), crop
def find_name(crop_gray, crop_org):
template = cv2.UMat(cv2.imread('name_mask_%s.jpg'%pixel_x, 0))
# showimg(crop_org)
w, h = cv2.UMat.get(template).shape[::-1]
res = cv2.matchTemplate(crop_gray, template, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
# print(max_loc)
top_left = (max_loc[0] + w, max_loc[1] - int(20*x))
bottom_right = (top_left[0] + int(700*x), top_left[1] + int(300*x))
result = cv2.UMat.get(crop_org)[top_left[1]-10:bottom_right[1], top_left[0]-10:bottom_right[0]]
cv2.rectangle(crop_gray, top_left, bottom_right, 255, 2)
# showimg(result)
return cv2.UMat(result)
def find_sex(crop_gray, crop_org):
template = cv2.UMat(cv2.imread('sex_mask_%s.jpg'%pixel_x, 0))
# showimg(template)
w, h = cv2.UMat.get(template).shape[::-1]
res = cv2.matchTemplate(crop_gray, template, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = (max_loc[0] + w, max_loc[1] - int(20*x))
bottom_right = (top_left[0] + int(300*x), top_left[1] + int(300*x))
result = cv2.UMat.get(crop_org)[top_left[1]-10:bottom_right[1], top_left[0]-10:bottom_right[0]]
cv2.rectangle(crop_gray, top_left, bottom_right, 255, 2)
#showimg(crop_gray)
return cv2.UMat(result)
def find_nation(crop_gray, crop_org):
template = cv2.UMat(cv2.imread('nation_mask_%s.jpg'%pixel_x, 0))
#showimg(template)
w, h = cv2.UMat.get(template).shape[::-1]
res = cv2.matchTemplate(crop_gray, template, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = (max_loc[0] + w - int(20*x), max_loc[1] - int(20*x))
bottom_right = (top_left[0] + int(500*x), top_left[1] + int(300*x))
result = cv2.UMat.get(crop_org)[top_left[1]-10:bottom_right[1], top_left[0]-10:bottom_right[0]]
cv2.rectangle(crop_gray, top_left, bottom_right, 255, 2)
#showimg(crop_gray)
return cv2.UMat(result)
# def find_birth(crop_gray, crop_org):
# template = cv2.UMat(cv2.imread('birth_mask_%s.jpg'%pixel_x, 0))
# # showimg(template)
# w, h = cv2.UMat.get(template).shape[::-1]
# res = cv2.matchTemplate(crop_gray, template, cv2.TM_CCOEFF_NORMED)
# #showimg(crop_gray)
# min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
# top_left = (max_loc[0] + w, max_loc[1] - int(20*x))
# bottom_right = (top_left[0] + int(1500*x), top_left[1] + int(300*x))
# # 提取result需要在rectangle之前
# date_org = cv2.UMat.get(crop_org)[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]]
# date = cv2.cvtColor(date_org, cv2.COLOR_BGR2GRAY)
# cv2.rectangle(crop_gray, top_left, bottom_right, 255, 2)
# # cv2.imwrite('date.png',date)
#
# # 提取年份
# template = cv2.UMat(cv2.imread('year_mask_%s.jpg'%pixel_x, 0))
# year_res = cv2.matchTemplate(date, template, cv2.TM_CCOEFF_NORMED)
# min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(year_res)
# bottom_right = (max_loc[0]+int(20*x), int(300*x))
# top_left = (0, 0)
# year = date_org[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]]
# # cv2.imwrite('year.png',year)
# cv2.rectangle(crop_gray, top_left, bottom_right, 255, 2)
#
# # 提取月
# template = cv2.UMat(cv2.imread('month_mask_%s.jpg'%pixel_x, 0))
# month_res = cv2.matchTemplate(date, template, cv2.TM_CCOEFF_NORMED)
# min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(month_res)
# bottom_right = (max_loc[0]+int(40*x), int(300*x))
# top_left = (max_loc[0] - int(220*x), 0)
# month = date_org[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]]
# # cv2.imwrite('month.png',month)
# cv2.rectangle(crop_gray, top_left, bottom_right, 255, 2)
#
# # 提取日
# template = cv2.UMat(cv2.imread('day_mask_%s.jpg'%pixel_x, 0))
# day_res = cv2.matchTemplate(date, template, cv2.TM_CCOEFF_NORMED)
# min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(day_res)
# bottom_right = (max_loc[0]+int(20*x), int(300*x))
# top_left = (max_loc[0] - int(220*x), 0)
# day = date_org[top_left[1]:bottom_right[1], top_left[0]:bottom_right[0]]
# # cv2.imwrite('day.png',day)
# cv2.rectangle(crop_gray, top_left, bottom_right, 255, 2)
# showimg(crop_gray)
# return cv2.UMat(year), cv2.UMat(month), cv2.UMat(day)
def find_address(crop_gray, crop_org):
template = cv2.UMat(cv2.imread('address_mask_%s.jpg'%pixel_x, 0))
# showimg(template)
#showimg(crop_gray)
w, h = cv2.UMat.get(template).shape[::-1]
#t1 = round(time.time()*1000)
res = cv2.matchTemplate(crop_gray, template, cv2.TM_CCOEFF_NORMED)
#t2 = round(time.time()*1000)
#print 'time:%s'%(t2-t1)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = (max_loc[0] + w, max_loc[1] - int(20*x))
bottom_right = (top_left[0] + int(1700*x), top_left[1] + int(550*x))
result = cv2.UMat.get(crop_org)[top_left[1]-10:bottom_right[1], top_left[0]-10:bottom_right[0]]
cv2.rectangle(crop_gray, top_left, bottom_right, 255, 2)
#showimg(crop_gray)
return cv2.UMat(result)
def find_idnum(crop_gray, crop_org):
template = cv2.UMat(cv2.imread('idnum_mask_%s.jpg'%pixel_x, 0))
# showimg(template)
#showimg(crop_gray)
w, h = cv2.UMat.get(template).shape[::-1]
res = cv2.matchTemplate(crop_gray, template, cv2.TM_CCOEFF_NORMED)
min_val, max_val, min_loc, max_loc = cv2.minMaxLoc(res)
top_left = (max_loc[0] + w, max_loc[1] - int(20*x))
bottom_right = (top_left[0] + int(2300*x), top_left[1] + int(300*x))
result = cv2.UMat.get(crop_org)[top_left[1]-10:bottom_right[1], top_left[0]-10:bottom_right[0]]
cv2.rectangle(crop_gray, top_left, bottom_right, 255, 2)
#showimg(crop_gray)
return cv2.UMat(result)
def showimg(img):
cv2.namedWindow("contours", 0);
cv2.resizeWindow("contours", 1280, 720);
cv2.imshow("contours", img)
cv2.waitKey()
#psm model:
# 0 Orientation and script detection (OSD) only.
# 1 Automatic page segmentation with OSD.
# 2 Automatic page segmentation, but no OSD, or OCR.
# 3 Fully automatic page segmentation, but no OSD. (Default)
# 4 Assume a single column of text of variable sizes.
# 5 Assume a single uniform block of vertically aligned text.
# 6 Assume a single uniform block of text.
# 7 Treat the image as a single text line.
# 8 Treat the image as a single word.
# 9 Treat the image as a single word in a circle.
# 10 Treat the image as a single character.
# 11 Sparse text. Find as much text as possible in no particular order.
# 12 Sparse text with OSD.
# 13 Raw line. Treat the image as a single text line,
# bypassing hacks that are Tesseract-specific
def get_name(img):
# cv2.imshow("method3", img)
# cv2.waitKey()
print('name')
_, _, red = cv2.split(img) #split 会自动将UMat转换回Mat
red = cv2.UMat(red)
red = hist_equal(red)
red = cv2.adaptiveThreshold(red, 255, cv2.ADAPTIVE_THRESH_MEAN_C, cv2.THRESH_BINARY, 151, 50)
# red = cv2.medianBlur(red, 3)
red = img_resize(red, 150)
img = img_resize(img, 150)
# showimg(red)
# cv2.imwrite('name.png', red)
# img2 = Image.open('address.png')
# img = Image.fromarray(cv2.UMat.get(red).astype('uint8'))
#return get_result_vary_length(red, 'chi_sim', img, '-psm 7')
return get_result_vary_length(red, 'chi_sim', img, '--psm 7')
# return punc_filter(pytesseract.image_to_string(img, lang='chi_sim', config='-psm 13').replace(" ",""))
def get_sex(img):
_, _, red = cv2.split(img)
print('sex')
red = cv2.UMat(red)
red = hist_equal(red)
red = cv2.adaptiveThreshold(red, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 151, 50)
# red = cv2.medianBlur(red, 3)
# cv2.imwrite('address.png', img)
# img2 = Image.open('address.png')
red = img_resize(red, 150)
# cv2.imwrite('sex.png', red)
# img = Image.fromarray(cv2.UMat.get(red).astype('uint8'))
#return get_result_fix_length(red, 1, 'sex', '-psm 10')
return get_result_fix_length(red, 1, 'chi_sim', '--psm 10')
# return pytesseract.image_to_string(img, lang='sex', config='-psm 10').replace(" ","")
def get_nation(img):
_, _, red = cv2.split(img)
print('nation')
red = cv2.UMat(red)
red = hist_equal(red)
red = cv2.adaptiveThreshold(red, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 151, 50)
red = img_resize(red, 150)
# cv2.imwrite('nation.png', red)
# img = Image.fromarray(cv2.UMat.get(red).astype('uint8'))
#return get_result_fix_length(red, 1, 'nation', '-psm 10')
return get_result_fix_length(red, 1, 'chi_sim', '--psm 10')
# return pytesseract.image_to_string(img, lang='nation', config='-psm 13').replace(" ","")
# def get_birth(year, month, day):
# _, _, red = cv2.split(year)
# red = cv2.UMat(red)
# red = hist_equal(red)
# red = cv2.adaptiveThreshold(red, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 151, 50)
# red = img_resize(red, 150)
# # cv2.imwrite('year_red.png', red)
# year_red = red
#
# _, _, red = cv2.split(month)
# red = cv2.UMat(red)
# red = hist_equal(red)
# red = cv2.adaptiveThreshold(red, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 151, 50)
# #red = cv2.erode(red,kernel,iterations = 1)
# red = img_resize(red, 150)
# # cv2.imwrite('month_red.png', red)
# month_red = red
#
# _, _, red = cv2.split(day)
# red = cv2.UMat(red)
# red = hist_equal(red)
# red = cv2.adaptiveThreshold(red, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 151, 50)
# red = img_resize(red, 150)
# # cv2.imwrite('day_red.png', red)
# day_red = red
# # return pytesseract.image_to_string(img, lang='birth', config='-psm 7')
# return get_result_fix_length(year_red, 4, 'eng', '-c tessedit_char_whitelist=0123456789 -psm 13'), \
# get_result_vary_length(month_red, 'eng', '-c tessedit_char_whitelist=0123456789 -psm 13'), \
# get_result_vary_length(day_red, 'eng', '-c tessedit_char_whitelist=0123456789 -psm 13')
def get_address(img):
#_, _, red = cv2.split(img)
#red = cv2.medianBlur(red, 3)
print('address')
_, _, red = cv2.split(img)
red = cv2.UMat(red)
red = hist_equal(red)
red = cv2.adaptiveThreshold(red, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 151, 50)
red = img_resize(red, 300)
#img = img_resize(img, 300)
#cv2.imwrite('address_red.png', red)
img = Image.fromarray(cv2.UMat.get(red).astype('uint8'))
#return punc_filter(get_result_vary_length(red,'chi_sim', img, '-psm 6'))
return punc_filter(get_result_vary_length(red, 'chi_sim', img, '--psm 6'))
#return punc_filter(pytesseract.image_to_string(img, lang='chi_sim', config='-psm 3').replace(" ",""))
def get_idnum_and_birth(img):
_, _, red = cv2.split(img)
print('idnum')
red = cv2.UMat(red)
red = hist_equal(red)
red = cv2.adaptiveThreshold(red, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C, cv2.THRESH_BINARY, 151, 50)
red = img_resize(red, 150)
# cv2.imwrite('idnum_red.png', red)
#idnum_str = get_result_fix_length(red, 18, 'idnum', '-psm 8')
# idnum_str = get_result_fix_length(red, 18, 'eng', '--psm 8 ')
img = Image.fromarray(cv2.UMat.get(red).astype('uint8'))
idnum_str = get_result_vary_length(red, 'eng', img, '--psm 8 ')
return idnum_str, idnum_str[6:14]
def get_result_fix_length(red, fix_length, langset, custom_config=''):
red_org = red
cv2.fastNlMeansDenoising(red, red, 4, 7, 35)
rec, red = cv2.threshold(red, 127, 255, cv2.THRESH_BINARY_INV)
image, contours, hierarchy = cv2.findContours(red, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# print(len(contours))
# 描边一次可以减少噪点
cv2.drawContours(red, contours, -1, (0, 255, 0), 1)
color_img = cv2.cvtColor(red, cv2.COLOR_GRAY2BGR)
# for x, y, w, h in contours:
# imgrect = cv2.rectangle(color_img, (x, y), (x + w, y + h), (0, 255, 0), 2)
# showimg(imgrect)
h_threshold = 54
numset_contours = []
calcu_cnt = 1
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if h > h_threshold:
numset_contours.append((x, y, w, h))
while len(numset_contours) != fix_length:
if calcu_cnt > 50:
print(u'计算次数过多!目前阈值为:', h_threshold)
break
numset_contours = []
calcu_cnt += 1
if len(numset_contours) > fix_length:
h_threshold += 1
contours_cnt = 0
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if h > h_threshold:
contours_cnt += 1
numset_contours.append((x, y, w, h))
if len(numset_contours) < fix_length:
h_threshold -= 1
contours_cnt = 0
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if h > h_threshold:
contours_cnt += 1
numset_contours.append((x, y, w, h))
result_string = ''
numset_contours.sort(key=lambda num: num[0])
for x, y, w, h in numset_contours:
result_string += pytesseract.image_to_string(cv2.UMat.get(red_org)[y-10:y + h + 10, x-10:x + w + 10], lang=langset, config=custom_config)
# print(new_r)
# cv2.imwrite('fixlengthred.png', cv2.UMat.get(red_org)[y-10:y + h +10 , x-10:x + w + 10])
print(result_string)
return result_string
def get_result_vary_length(red, langset, org_img, custom_config=''):
red_org = red
# cv2.fastNlMeansDenoising(red, red, 4, 7, 35)
rec, red = cv2.threshold(red, 127, 255, cv2.THRESH_BINARY_INV)
image, contours, hierarchy = cv2.findContours(red, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
# print(len(contours))
# 描边一次可以减少噪点
cv2.drawContours(red, contours, -1, (255, 255, 255), 1)
color_img = cv2.cvtColor(red, cv2.COLOR_GRAY2BGR)
numset_contours = []
height_list=[]
width_list=[]
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
height_list.append(h)
# print(h,w)
width_list.append(w)
height_list.remove(max(height_list))
width_list.remove(max(width_list))
height_threshold = 0.70*max(height_list)
width_threshold = 1.4 * max(width_list)
# print('height_threshold:'+str(height_threshold)+'width_threshold:'+str(width_threshold))
big_rect=[]
for cnt in contours:
x, y, w, h = cv2.boundingRect(cnt)
if h > height_threshold and w < width_threshold:
# print(h,w)
numset_contours.append((x, y, w, h))
big_rect.append((x, y))
big_rect.append((x + w, y + h))
big_rect_nparray = np.array(big_rect, ndmin=3)
x, y, w, h = cv2.boundingRect(big_rect_nparray)
# imgrect = cv2.rectangle(color_img, (x, y), (x + w, y + h), (0, 255, 0), 2)
# showimg(imgrect)
# showimg(cv2.UMat.get(org_img)[y:y + h, x:x + w])
result_string = ''
result_string += pytesseract.image_to_string(cv2.UMat.get(red_org)[y-10:y + h + 10, x-10:x + w + 10], lang=langset,
config=custom_config)
print(result_string)
# cv2.imwrite('varylength.png', cv2.UMat.get(org_img)[y:y + h, x:x + w])
# cv2.imwrite('varylengthred.png', cv2.UMat.get(red_org)[y:y + h, x:x + w])
# numset_contours.sort(key=lambda num: num[0])
# for x, y, w, h in numset_contours:
# result_string += pytesseract.image_to_string(cv2.UMat.get(color_img)[y:y + h, x:x + w], lang=langset, config=custom_config)
return punc_filter(result_string)
def punc_filter(str):
temp = str
xx = u"([\u4e00-\u9fff0-9A-Z]+)"
pattern = re.compile(xx)
results = pattern.findall(temp)
string = ""
for result in results:
string += result
return string
#这里使用直方图拉伸,不是直方图均衡
def hist_equal(img):
# clahe_size = 8
# clahe = cv2.createCLAHE(clipLimit=1.0, tileGridSize=(clahe_size, clahe_size))
# result = clahe.apply(img)
#test
#result = cv2.equalizeHist(img)
image = img.get() #UMat to Mat
# result = cv2.equalizeHist(image)
lut = np.zeros(256, dtype = image.dtype )#创建空的查找表
#lut = np.zeros(256)
hist= cv2.calcHist([image], #计算图像的直方图
[0], #使用的通道
None, #没有使用mask
[256], #it is a 1D histogram
[0,256])
minBinNo, maxBinNo = 0, 255
#计算从左起第一个不为0的直方图柱的位置
for binNo, binValue in enumerate(hist):
if binValue != 0:
minBinNo = binNo
break
#计算从右起第一个不为0的直方图柱的位置
for binNo, binValue in enumerate(reversed(hist)):
if binValue != 0:
maxBinNo = 255-binNo
break
#print minBinNo, maxBinNo
#生成查找表
for i,v in enumerate(lut):
if i < minBinNo:
lut[i] = 0
elif i > maxBinNo:
lut[i] = 255
else:
lut[i] = int(255.0*(i-minBinNo)/(maxBinNo-minBinNo)+0.5)
#计算,调用OpenCV cv2.LUT函数,参数 image -- 输入图像,lut -- 查找表
#print lut
result = cv2.LUT(image, lut)
#print type(result)
#showimg(result)
return cv2.UMat(result)
if __name__=="__main__":
idocr = idcardocr(cv2.UMat(cv2.imread('testimages/zrh.jpg')))
print(idocr)
# for i in range(15):
# idocr = idcardocr(cv2.UMat(cv2.imread('testimages/%s.jpg'%(i+1))))
# print(idocr['idnum'])
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化