加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
该仓库未声明开源许可证文件(LICENSE),使用请关注具体项目描述及其代码上游依赖。
克隆/下载
server.py 37.75 KB
一键复制 编辑 原始数据 按行查看 历史

from flask import Flask, request
from ResumeParsing import ResumeParsing
import json
import re
from bs4 import BeautifulSoup
def txt_wrap_by(start_str, end, html):
"""
截取字符串html中从start_str到end的部分,不包含start_str和end
"""
start = html.find(start_str)
if start >= 0:
start += len(start_str)
end = html.find(end, start)
if end >= 0:
return html[start:end].strip()
def ganjiParser(htmlfile):
"""
docx格式转换成txt格式简历解析
"""
print('进入txt格式简历解析')
name = ''
sex = ''
age = ''
edu = ''
tel = ''
mail = ''
exp = ''
place = ''
city = ''
wed = ''
school = ''
salary = ''
master = ''
work = ''
time = ''
company = ''
filetxt = []
workexplist='['
eduexplist = '['
worktag = -1
edutag = -1
workexptxt = []
eduexptxt = []
edunumber = 0
worknumber = 0
for byteline in htmlfile:
# line =bytes.decode(byteline)
byteline = re.sub('\s', '', byteline)
if (byteline != ''):
filetxt.append(byteline)
print(filetxt[0])
if("赶集网"in filetxt[0]):
if re.findall('\d{4}/\d{2}/\d{2}', filetxt[0]) != []:
time = re.findall('\d{4}/\d{2}/\d{2}', filetxt[0])[0]
try:
name = filetxt[1].split("(")[0]
except Exception:
print("###########提取姓名错误#############")
try:
ageSex = txt_wrap_by("(", ")", filetxt[1])
age = ageSex.split(",")[1]
sex = ageSex.split(",")[0]
except Exception:
print("###########提取姓名错误#############")
try:
edu = filetxt[2].split("最高学历:")[1]
except Exception:
print("###########提取学历错误#############")
try:
mail = filetxt[5].split("邮箱:")[1]
except Exception:
print("###########提取邮箱错误#############")
try:
palace = filetxt[4].split("籍贯:")[1]
except Exception:
print("###########提取palace错误#############")
try:
salary = txt_wrap_by("期望月薪:", "工作年限:", filetxt[3])
exp = txt_wrap_by("工作年限:", "年", filetxt[3])
city = txt_wrap_by("期望地区:", "籍贯:", filetxt[4])
tel = txt_wrap_by("联系电话:", "邮箱:", filetxt[5])
except Exception:
print("###########提取期望月薪等错误#############")
try:
for i in range(6, len(filetxt)):
# print(filetxt[i])
if "工作经验" in str(filetxt[i]):
worktag = 1
elif "教育经历" in str(filetxt[i]):
eduexptxt.append(filetxt[i + 1])
if filetxt[i] in ['项目经历', '教育经历']:
worktag = -1
if (worktag == 1 and filetxt[i] != ''):
workexptxt.append(filetxt[i])
try:
edunumber = len(re.findall('\d{4}/\d{1}至',eduexptxt[0]))
for i in range(0, edunumber):
try:
eduStartTime = ''
eduEndTime = ''
eduSchool = ''
eduMaster = ''
eduTemplate = {
"eduStart": "",
"eduEnd": "",
"education": "",
"eduSchool": "",
"eduMaster": ""}
edulist = eduexptxt[0].split("|")
if(i==0):
master = edulist[len(edulist) - 1]
eduTime = edulist[0]
eduStartTime = eduTime.split('至')[0]
eduEndTime = eduTime.split('至')[1]
eduSchool = edulist[1]
eduMaster = master
education = edulist[3]
eduTemplate['eduStart'] = eduStartTime
eduTemplate['eduEnd'] = eduEndTime
eduTemplate['education'] = education
eduTemplate['eduSchool'] = eduSchool
eduTemplate['eduMaster'] = eduMaster
if (i == edunumber - 1):
eduexplist = eduexplist + json.dumps(eduTemplate)
else:
eduexplist = eduexplist + json.dumps(eduTemplate) + ','
except Exception:
continue
except Exception:
print("###########ganji教育经历#############")
try:
worknumber=int((len(workexptxt)-1)/2)
for i in range(0,worknumber):
try:
workTitle = ''
workCompany = ''
startTime = ''
endTime = ''
workContent = ''
workTemplate = {
"startTime": "#STARTTIME#",
"endTime": "#ENDTIME#",
"workCompany": "#WORKCOMPANY#",
"workTitle": "#WORKTITLE#",
"workContent": "#WORKCONTENT#"}
workTime =txt_wrap_by("工作时间:", "职位名称:", workexptxt[i*2 + 2])
startTime = re.sub('\s', '', workTime.split('至')[0])
endTime = re.sub('\s', '', workTime.split('至')[1])
workCompany = workexptxt[i*2 + 1].split('(')[0]
workTitle = txt_wrap_by("职位名称:", "公司行业:", workexptxt[i*2 + 2])
workContent = workexptxt[i*2 + 2].split('工作内容:')[1]
workTemplate['startTime'] = startTime
workTemplate['endTime'] = endTime
workTemplate['workTitle'] = workTitle
workTemplate['workCompany'] = workCompany
workTemplate['workContent'] = workContent
if(i==0):
company = workCompany
work = workTitle
if (i == worknumber-1):
workexplist = workexplist + json.dumps(workTemplate)
else:
workexplist = workexplist + json.dumps(workTemplate) + ','
except Exception:
continue
except Exception:
print("###########ganji工作经历#############")
except Exception:
print("###########提取工作经验等错误#############")
elif("历ID:"in filetxt[0] or ("姓名:"in filetxt[0] and ("|"in filetxt[0]) )):
# 智联卓聘简历
for i in range(0, len(filetxt)):
# print(filetxt[i])
try:
if ('姓名:' in filetxt[i]):
namestr = re.sub('姓名:', '', filetxt[i])
namelist = namestr.split('|')
name = namelist[0]
for each in namelist:
if each in ['男', '女']:
sex = each
if ('岁' in each) and re.findall('\d{2}岁', each) != []:
age = re.findall('\d{2}岁', each)[0]
if '年工作经验' in each or '无工作经验' in each:
exp = each
if ('岁' in filetxt[i]) and re.findall('\d{2}岁', filetxt[i]) != []:
basiclist = filetxt[i].split('|')
place = basiclist[len(basiclist) - 1]
for each in basiclist:
if ('岁' in each) and re.findall('\d{2}岁', each) != []:
age = re.findall('\d{2}岁', each)[0]
if each in ['已婚', '未婚', '离异', '丧偶', '保密', 'Married', 'Single']:
wed = each
if ('更新时间:' in filetxt[i] and re.findall('\d{4}-\d{2}-\d{2}', filetxt[i]) != []):
time = re.findall('\d{4}-\d{2}-\d{2}', filetxt[i])[0]
if re.findall('1\d{10}', filetxt[i]) != []:
tel = re.findall('1\d{10}', filetxt[i])[0]
if ('职位:' in filetxt[i]):
worklist = filetxt[i].split(':')
work = worklist[1]
if ('手机:' in filetxt[i]):
tellist = filetxt[i].split(':')
tel = tellist[1]
if ('邮箱:' in filetxt[i]):
maillist = filetxt[i].split(':')
mail = maillist[1]
if ('婚姻状况:' in filetxt[i]):
wedlist = filetxt[i].split(':')
wed = wedlist[1]
if ('期望薪资' in filetxt[i]):
if ('元/月' in filetxt[i]):
salarylist = filetxt[i].split(':')
salary = salarylist[1]
else:
salary = filetxt[i + 1]
if ('地点:' in filetxt[i]):
citylist = filetxt[i].split(':')
city = citylist[1]
if (city == ''):
city = filetxt[i + 1]
if "工作经验" == str(filetxt[i]):
worktag = 1
elif "教育背景" == str(filetxt[i]):
edutag = 1
if filetxt[i] in ['项目经验', '教育背景']:
worktag = -1
if filetxt[i] in ['培训经历', '其他能力']:
edutag = -1
if (worktag == 1 and filetxt[i] != ''):
workexptxt.append(filetxt[i])
if(re.findall('((\d{4}年\d{1}月)|(\d{4}年\d{2}月))--(至今|(\d{4}年\d{1}月)|(\d{4}年\d{2}月))', filetxt[i])!=[]):
worknumber = worknumber+1
if (edutag == 1 and filetxt[i] != ''):
eduexptxt.append(filetxt[i])
if (re.findall('((\d{4}年\d{1}月)|(\d{4}年\d{2}月))--(至今|(\d{4}年\d{1}月)|(\d{4}年\d{2}月))',filetxt[i])!= []):
edunumber = edunumber + 1
except Exception:
continue
try:
k = 0
for i in range(0, len(eduexptxt)):
try:
eduStartTime = ''
eduEndTime = ''
eduSchool = ''
eduMaster = ''
education = ''
eduTemplate = {
"eduStart": "",
"eduEnd": "",
"education": "",
"eduSchool": "",
"eduMaster": ""}
eduTime=re.findall('((\d{4}年\d{1}月)|(\d{4}年\d{2}月))--(至今|(\d{4}年\d{1}月)|(\d{4}年\d{2}月))',eduexptxt[i])
if( eduTime!= []):
k = k + 1
eduStartTime = eduTime[0][0]
eduEndTime = eduTime[0][3]
eduSchool = re.sub(eduStartTime+'--'+eduEndTime,'',eduexptxt[i])
if('专业名称:' in eduexptxt[i+1]):
eduMaster = eduexptxt[i + 1].split(':')[1]
if('学历/学位:' in eduexptxt[i+2] and "全日制统招:"in eduexptxt[i+2] ):
education=txt_wrap_by('学历/学位:', '全日制统招:', eduexptxt[i+2])
eduTemplate['eduStart'] = eduStartTime
eduTemplate['eduEnd'] = eduEndTime
eduTemplate['education'] = education
eduTemplate['eduSchool'] = eduSchool
eduTemplate['eduMaster'] = eduMaster
if (k == 1):
edu = education
school = eduSchool
master = eduMaster
if (k == edunumber):
eduexplist = eduexplist + json.dumps(eduTemplate)
else:
eduexplist = eduexplist + json.dumps(eduTemplate) + ','
except Exception:
continue
except Exception:
print("###########zhuopin教育经历#############")
try:
k = 0
for i in range(0, len(workexptxt)):
try:
workTitle = ''
workCompany = ''
startTime = ''
endTime = ''
workContent = ''
workTemplate = {
"startTime": "#STARTTIME#",
"endTime": "#ENDTIME#",
"workCompany": "#WORKCOMPANY#",
"workTitle": "#WORKTITLE#",
"workContent": "#WORKCONTENT#"}
workTime = re.findall('((\d{4}年\d{1}月)|(\d{4}年\d{2}月))--(至今|(\d{4}年\d{1}月)|(\d{4}年\d{2}月))', workexptxt[i])
if(workTime != []):
k = k + 1
startTime = workTime[0][0]
endTime = workTime[0][3]
workCompany = txt_wrap_by(endTime, "|", workexptxt[i])
workTitle = txt_wrap_by("|", "(", workexptxt[i])
for m in range(i,len(workexptxt)):
if("职责描述:"in workexptxt[m]):
workContent=workexptxt[m].split(':')
for t in range(m+1,len(workexptxt)):
if(re.findall('((\d{4}年\d{1}月)|(\d{4}年\d{2}月))--(至今|(\d{4}年\d{1}月)|(\d{4}年\d{2}月))', workexptxt[t])!=[])or '总结:'in workexptxt[t]:
break
workContent = workContent+workexptxt[t]
break
workTemplate['startTime'] = startTime
workTemplate['endTime'] = endTime
workTemplate['workTitle'] = workTitle
workTemplate['workCompany'] = workCompany
workTemplate['workContent'] = workContent
if (k == 1):
work = workTitle
company = workCompany
if (k == worknumber):
workexplist = workexplist + json.dumps(workTemplate)
else:
workexplist = workexplist + json.dumps(workTemplate) + ','
except Exception:
continue
except Exception:
print("###########zhuopin工作经历#############")
elif ("更新时间:" in filetxt[0] or "更新时间:" in filetxt[3]):
# 51job格式简历
for i in range(0, len(filetxt)):
# print(filetxt[i])
try:
if ('ID:' in filetxt[i]):
namelist = filetxt[i].split('ID:')
name = namelist[0]
if ('更新时间:' in filetxt[i] and re.findall('\d{4}-\d{2}-\d{2}', filetxt[i]) != []):
time = re.findall('\d{4}-\d{2}-\d{2}', filetxt[i])[0]
if re.findall('1\d{10}', filetxt[i]) != []:
tel = re.findall('1\d{10}', filetxt[i])[0]
if ('职位:' in filetxt[i] and work==''):
worklist = filetxt[i].split(':')
work = worklist[1]
if re.findall('@.*com', filetxt[i]) != []:
mail = filetxt[i]
basiclist = filetxt[i + 1].split('|')
for each in basiclist:
if each in ['男', '女']:
sex = each
if ('岁' in each) and re.findall('\d{2}岁', each) != []:
age = re.findall('\d{2}岁', each)[0]
if '现居住' in each:
place = re.sub('现居住', '', each)
if '年工作经验' in each or '无工作经验' in each:
exp = each
if ('专业:' in filetxt[i] and master==''):
masterlist = filetxt[i].split(':')
master = masterlist[1]
if ('学校:' in filetxt[i] and school==''):
schoollist = filetxt[i].split(':')
school = schoollist[1]
if ('学历/学位:' in filetxt[i] and edu==''):
edulist = filetxt[i].split(':')
edu = edulist[1]
if ('婚姻状况:' in filetxt[i]):
wedlist = filetxt[i].split(':')
wed = wedlist[1]
if ('期望薪资:' in filetxt[i]):
salarylist = filetxt[i].split(':')
salary = salarylist[1]
if ('地点:' in filetxt[i] and city==''):
citylist = filetxt[i].split(':')
city = citylist[1]
if ('公司:' in filetxt[i] and company==''):
companylist = filetxt[i].split(':')
company = companylist[1]
if "工作经验" == str(filetxt[i]):
worktag = 1
elif "教育经历" == str(filetxt[i]):
edutag = 1
if filetxt[i] in ['项目经验', '教育经历']:
worktag = -1
if filetxt[i] in ['在校情况', '校内荣誉']:
edutag = -1
if (worktag == 1 and filetxt[i] != ''):
workexptxt.append(filetxt[i])
if(re.findall('((\d{4}/\d{1})|(\d{4}/\d{2}))-(至今|(\d{4}/\d{1})|(\d{4}/\d{2}))', filetxt[i])!=[]):
worknumber = worknumber+1
if (edutag == 1 and filetxt[i] != ''):
eduexptxt.append(filetxt[i])
if (re.findall('((\d{4}/\d{1})|(\d{4}/\d{2}))-(至今|(\d{4}/\d{1})|(\d{4}/\d{2}))',filetxt[i]) != []):
edunumber = edunumber + 1
except Exception:
continue
try:
k = 0
for i in range(0, len(eduexptxt)):
try:
eduStartTime = ''
eduEndTime = ''
eduSchool = ''
eduMaster = ''
eduTemplate = {
"eduStart": "",
"eduEnd": "",
"education": "",
"eduSchool": "",
"eduMaster": ""}
eduTime=re.findall('((\d{4}/\d{1})|(\d{4}/\d{2}))-(至今|(\d{4}/\d{1})|(\d{4}/\d{2}))',eduexptxt[i])
if( eduTime!= []):
k = k + 1
eduStartTime = eduTime[0][0]
eduEndTime = eduTime[0][3]
eduSchool = re.sub(eduStartTime+'-'+eduEndTime,'',eduexptxt[i])
if('|' in eduexptxt[i+1]):
education=eduexptxt[i+1].split('|')[0]
eduMaster=eduexptxt[i+1].split('|')[1]
eduTemplate['eduStart'] = eduStartTime
eduTemplate['eduEnd'] = eduEndTime
eduTemplate['education'] = education
eduTemplate['eduSchool'] = eduSchool
eduTemplate['eduMaster'] = eduMaster
if (k == edunumber):
eduexplist = eduexplist + json.dumps(eduTemplate)
else:
eduexplist = eduexplist + json.dumps(eduTemplate) + ','
except Exception:
continue
except Exception:
print("###########51job教育经历#############")
try:
k = 0
for i in range(0, len(workexptxt)):
try:
workTitle = ''
workCompany = ''
startTime = ''
endTime = ''
workContent = ''
workTemplate = {
"startTime": "#STARTTIME#",
"endTime": "#ENDTIME#",
"workCompany": "#WORKCOMPANY#",
"workTitle": "#WORKTITLE#",
"workContent": "#WORKCONTENT#"}
workTime = re.findall('((\d{4}/\d{1})|(\d{4}/\d{2}))-(至今|(\d{4}/\d{1})|(\d{4}/\d{2}))', workexptxt[i])
if(workTime != []):
k = k + 1
startTime = workTime[0][0]
endTime = workTime[0][3]
workCompany = re.sub(startTime+'-'+endTime,'',workexptxt[i])
for m in range(i,len(workexptxt)):
if ("工作描述:" in workexptxt[m]):
workTitle = workexptxt[m - 1]
workContent = workexptxt[m].split(':')[1]
for t in range(m + 1, len(workexptxt)):
if (re.findall('((\d{4}/\d{1})|(\d{4}/\d{2}))-(至今|(\d{4}/\d{1})|(\d{4}/\d{2}))',workexptxt[t]) != []) or '下属:' in workexptxt[t]:
break
workContent = workContent + workexptxt[t]
break
workTemplate['startTime'] = startTime
workTemplate['endTime'] = endTime
workTemplate['workTitle'] = workTitle
workTemplate['workCompany'] = workCompany
workTemplate['workContent'] = workContent
if (k == worknumber):
workexplist = workexplist + json.dumps(workTemplate)
else:
workexplist = workexplist + json.dumps(workTemplate) + ','
except Exception:
continue
except Exception:
print("###########51job工作经历#############")
else:
print("客户提供的一种docx格式简历")
for i in range(0, len(filetxt)):
try:
if "姓名:" in str(filetxt[i]):
name = txt_wrap_by("姓名:", "性别:", filetxt[i])
sexAge = filetxt[i].split("性别:")
if (len(sexAge) == 2):
sex = sexAge[1]
if "工作年限:" in str(filetxt[i]):
exp = txt_wrap_by("工作年限:", "年龄:", filetxt[i])
agelist = filetxt[i].split("年龄:")
if (len(agelist) == 2):
age = agelist[1]
if "学历:" in str(filetxt[i]) and edu == '':
edulist = filetxt[i].split("学历:")
if (len(edulist) == 2):
edu = edulist[1]
if "婚姻状况:" in str(filetxt[i]) and wed == '':
wedlist = filetxt[i].split("婚姻状况:")
if (len(wedlist) == 2):
wed = wedlist[1]
elif "所在地:" in str(filetxt[i]):
palcelist = filetxt[i].split(":")
if (len(palcelist) == 2):
place = palcelist[1]
elif "期望地点:" in str(filetxt[i]):
citylist = filetxt[i].split(":")
if (len(citylist) == 2):
city = citylist[1]
if "电子邮件:" in str(filetxt[i]) and "学历:" in str(filetxt[i]):
mail = txt_wrap_by("电子邮件:", "学历:", filetxt[i])
if "联系电话:" in str(filetxt[i]) and "婚姻状况:" in str(filetxt[i]):
tel = txt_wrap_by("联系电话:", "婚姻状况:", filetxt[i])
tel = re.sub('已验证举报', '', tel)
if "期望月薪:" in str(filetxt[i]):
salarylist = filetxt[i].split(":")
if (len(salarylist) == 2):
salary = salarylist[1]
if "教育经历" in str(filetxt[i]) and school == '':
school = filetxt[i + 1].split("(")[0]
if "专业:" in str(filetxt[i]) and "学历:" in str(filetxt[i]):
master = txt_wrap_by("专业:", "学历:", filetxt[i])
if "所任职位:" in str(filetxt[i]) and "目前薪资:" in str(filetxt[i]):
work = txt_wrap_by("所任职位:", "目前薪资:", filetxt[i])
if "公司名称:" in str(filetxt[i]) and company == '':
company = filetxt[i].split("公司名称:")[1]
if "工作经历" == str(filetxt[i]):
worktag = 1
elif "教育经历" == str(filetxt[i]):
edutag = 1
if filetxt[i] in ['项目经历', '教育经历']:
worktag = -1
if filetxt[i] in ['语言能力', '自我评价', '附加消息']:
edutag = -1
if (worktag == 1 and filetxt[i] != ''):
workexptxt.append(filetxt[i])
if(re.findall('((\d{4}.\d{1})|(\d{4}.\d{2}))-(至今|(\d{4}.\d{1})|(\d{4}.\d{2}))', filetxt[i])!=[]):
worknumber = worknumber+1
if (edutag == 1 and filetxt[i] != ''):
eduexptxt.append(filetxt[i])
if (re.findall('((\d{4}.\d{1})|(\d{4}.\d{2}))-(至今|(\d{4}.\d{1})|(\d{4}.\d{2}))',filetxt[i]) != []):
edunumber = edunumber + 1
except Exception:
continue
try:
k = 0
for i in range(0, len(eduexptxt)):
try:
eduStartTime = ''
eduEndTime = ''
eduSchool = ''
eduMaster = ''
eduTemplate = {
"eduStart": "",
"eduEnd": "",
"education": "",
"eduSchool": "",
"eduMaster": ""}
eduTime=re.findall('((\d{4}.\d{1})|(\d{4}.\d{2}))-(至今|(\d{4}.\d{1})|(\d{4}.\d{2}))',eduexptxt[i])
if( eduTime!= []):
k = k + 1
eduStartTime = eduTime[0][0]
eduEndTime = eduTime[0][3]
eduSchool = eduexptxt[i].split('(')[0]
if('专业:' in eduexptxt[i+1] and '学历:' in eduexptxt[i+1] ):
education=txt_wrap_by("专业:", "学历:", eduexptxt[i+1])
if('是否统招:' in eduexptxt[i+1]):
eduMaster=txt_wrap_by("学历:", "是否统招:", eduexptxt[i+1])
eduTemplate['eduStart'] = eduStartTime
eduTemplate['eduEnd'] = eduEndTime
eduTemplate['education'] = education
eduTemplate['eduSchool'] = eduSchool
eduTemplate['eduMaster'] = eduMaster
if (k == edunumber):
eduexplist = eduexplist + json.dumps(eduTemplate)
else:
eduexplist = eduexplist + json.dumps(eduTemplate) + ','
except Exception:
continue
except Exception:
print("###########yonghu教育经历#############")
try:
k = 0
for i in range(0, len(workexptxt)):
try:
workTitle = ''
workCompany = ''
startTime = ''
endTime = ''
workContent = ''
workTemplate = {
"startTime": "#STARTTIME#",
"endTime": "#ENDTIME#",
"workCompany": "#WORKCOMPANY#",
"workTitle": "#WORKTITLE#",
"workContent": "#WORKCONTENT#"}
workTime = re.findall('((\d{4}.\d{1})|(\d{4}.\d{2}))-(至今|(\d{4}.\d{1})|(\d{4}.\d{2}))', workexptxt[i])
if(workTime != []):
k = k + 1
startTime = workTime[0][0]
endTime = workTime[0][3]
workCompany =txt_wrap_by(endTime,'(',workexptxt[i])
for m in range(i,len(workexptxt)):
if("元/月"in workexptxt[m] and workTitle==''):
workTitle = re.sub('\d','',workexptxt[m])
workTitle = re.sub('元/月','',workTitle)
if ("工作职责和业绩:" in workexptxt[m]):
workContent = workexptxt[m].split(':')[1]
for t in range(m+1, len(workexptxt)):
if (re.findall('((\d{4}.\d{1})|(\d{4}.\d{2}))-(至今|(\d{4}.\d{1})|(\d{4}.\d{2}))', workexptxt[t]) != []) or ':' in workexptxt[t]:
break
workContent = workContent + workexptxt[t]
break
workTemplate['startTime'] = startTime
workTemplate['endTime'] = endTime
workTemplate['workTitle'] = workTitle
workTemplate['workCompany'] = workCompany
workTemplate['workContent'] = workContent
if (k == worknumber):
workexplist = workexplist + json.dumps(workTemplate)
else:
workexplist = workexplist + json.dumps(workTemplate) + ','
except Exception:
continue
except Exception:
print("###########yonghu工作经历#############")
workexplist = workexplist + ']'
eduexplist = eduexplist + ']'
print(workexplist)
print(eduexplist)
people1 = [time, name, sex, age, tel, mail, edu, wed, place, school, master, work, city, salary, exp,company,workexplist,eduexplist]
people = []
for each in people1:
if (each != None):
each = re.sub('\n', '', each)
each = re.sub('\s', '', each)
each = re.sub('\r', '', each)
each = re.sub('\|', '', each)
else:
each = ''
people.append(each)
print(people)
print('$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$$')
return people
app = Flask(__name__)
@app.route('/')
def hello_world():
return 'hello world'
@app.route('/register', methods=['POST'])
def register():
htmlfile = request.files['file']
file_name=htmlfile.filename
result = []
outputjson = {
"status":"SUCESS",
"message":"CHAT.RESPOND",
"data":{
"filename":"",
"time":"",
"name":"",
"sex":"",
"age":"",
"tel":"",
"mail":"",
"edu":"",
"wed":"",
"place":"",
"school":"",
"master":"",
"work":"",
"city":"",
"salary":"",
"exp":"",
"worklist": [],
"edulist": [],
"company": ""
}
}
print("接受请求文件名:"+file_name)
try:
content = htmlfile.read()
parser = ResumeParsing(content)
jianlitype = ''
try:
jianlitype = parser.DefineFiletype()
except Exception:
outputjson['status'] = '201'
outputjson['message'] = '简历类型错误'
print('********************')
print(jianlitype)
print('********************')
try:
if (jianlitype == ''):
if (".txt" in file_name):
fobj=open("name.txt", 'wb')
fobj.write(content)
fobj.close()
filetext = open("name.txt", 'r', encoding='utf-8')
result = ganjiParser(filetext)
else:
with open("name.htm", 'wb') as fobj:
fobj.write(content)
filetext = open("name.htm", 'r', encoding='utf-8')
parser = ResumeParsing(filetext)
jianlitype = parser.DefineFiletype()
if (jianlitype == 'zhiliandoc'):
result = parser.zhiliandoc(htmlfile)
elif (jianlitype == 'zhilianhtm'):
result = parser.zhilianhtm(htmlfile)
elif (jianlitype == 'rencaia'):
result = parser.rencaia(htmlfile)
elif (jianlitype == 'jianlika'):
result = parser.jianlika(htmlfile)
elif (jianlitype == 'fenjianli'):
result = parser.fenjianli(htmlfile)
elif (jianlitype == 'zhuopinhtm'):
result = parser.zhuopinhtm(htmlfile)
elif (jianlitype == 'zhuopin'):
result = parser.zhuopin(htmlfile)
elif (jianlitype == 'pin101'):
result = parser.pin101(htmlfile)
elif (jianlitype == 'job51'):
result = parser.job51(htmlfile)
elif (jianlitype == 'liepin'):
result = parser.liepin(htmlfile)
elif (jianlitype == 'tongcheng58'):
result = parser.tongcheng58(htmlfile)
elif(".txt" not in file_name):
outputjson['status'] = '201'
outputjson['message'] = '简历类型错误'
outputjson['data']['filename'] = file_name
print(outputjson)
return json.dumps(outputjson)
print(result)
except Exception:
print("#############")
print(result)
try:
if result == [] or (result[1] == '' and result[4] == '' and result[5] == ''):
outputjson['status'] = '201'
outputjson['message'] = '简历内容被损坏'
outputjson['data']['filename'] = file_name
return json.dumps(outputjson)
else:
outputjson['status'] = '200'
outputjson['message'] = '成功'
outputjson['data']['filename'] = file_name
outputjson['data']['time'] = result[0]
outputjson['data']['name'] = result[1]
if (result[2] in ['男','Male'] ):
outputjson['data']['sex'] = '0'
elif(result[2] in ['女','Female'] ):
outputjson['data']['sex'] = '1'
else:
outputjson['data']['sex'] = ''
result[3] = re.sub('岁', '', result[3])
result[3] = re.sub('Years', '', result[3])
outputjson['data']['age'] = result[3]
outputjson['data']['tel'] = result[4]
outputjson['data']['mail'] = result[5]
if (result[6] in [ '博士','Docter'] ):
outputjson['data']['edu'] = '6'
elif (result[6] in [ '硕士','Master'] ):
outputjson['data']['edu'] = '5'
elif (result[6] in ['本科','Bachelor'] ):
outputjson['data']['edu'] = '4'
elif (result[6] == '大专'):
outputjson['data']['edu'] = '3'
elif (result[6] in ['高中', '中专/技校', '中专', '中技']):
outputjson['data']['edu'] = '2'
else:
outputjson['data']['edu'] = '1'
if (result[7] in ['已婚', 'Married'] ):
outputjson['data']['wed'] = '1'
elif (result[7] in ['未婚', '离异', 'Single']):
outputjson['data']['wed'] = '0'
else:
outputjson['data']['wed'] = '2'
outputjson['data']['place'] = result[8]
outputjson['data']['school'] = result[9]
outputjson['data']['master'] = result[10]
outputjson['data']['work'] = result[11]
outputjson['data']['city'] = result[12]
outputjson['data']['salary'] = result[13]
result[14] = re.sub('无工作经验', '0', result[14])
result[14] = re.sub('Years', '', result[14])
outputjson['data']['exp'] = result[14].split("年")[0]
outputjson['data']['company'] = result[15]
try:
outputjson['data']['worklist'] = json.loads(result[16])
outputjson['data']['edulist'] = json.loads(result[17])
except Exception:
print("工作经历、教育经历错误")
return json.dumps(outputjson)
except Exception:
outputjson['status'] = '201'
outputjson['message'] = '解析错误'
outputjson['data']['filename'] = file_name
return json.dumps(outputjson)
except Exception:
outputjson['status'] = '201'
outputjson['message'] = '参数错误'
outputjson['data']['filename'] = file_name
return json.dumps(outputjson)
if __name__ == '__main__':
app.run('0.0.0.0',8087)
Loading...
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化