加入 Gitee
与超过 1200万 开发者一起发现、参与优秀开源项目,私有仓库也完全免费 :)
免费加入
文件
克隆/下载
utils.py 2.33 KB
一键复制 编辑 原始数据 按行查看 历史
Turing's Cat 提交于 2020-09-29 22:49 . releasev2
import jieba
import random
def ner(ques):
if "叫" in str(ques) and str(ques)[str(ques).index("叫") + 1] != "我":
return ques[ques.index("叫") + 1:]
elif "是" in str(ques):
return ques[ques.index("是") + 1:]
elif "姓名" in str(ques):
return ques[ques.index("姓名") + 2:]
elif "名字" in str(ques):
return ques[ques.index("名字") + 2:]
elif "叫我" in str(ques):
return ques[ques.index("叫我") + 2:]
elif "喊我" in str(ques):
return ques[ques.index("叫我") + 2:]
elif "称呼我" in str(ques):
return ques[ques.index("称呼我") + 3:]
def Levenshtein_Distance(str1, str2):
"""
计算字符串 str1 和 str2 的编辑距离
:param str1
:param str2
:return:
"""
matrix = [[i + j for j in range(len(str2) + 1)] for i in range(len(str1) + 1)]
for i in range(1, len(str1) + 1):
for j in range(1, len(str2) + 1):
if (str1[i - 1] == str2[j - 1]):
d = 0
else:
d = 1
matrix[i][j] = min(matrix[i - 1][j] + 1, matrix[i][j - 1] + 1, matrix[i - 1][j - 1] + d)
return matrix[len(str1)][len(str2)]
def question_review(ques, sensitive):
"""
审核用户对话,过滤敏感内容
:param ques: 用户说的话
:param sensitive: 敏感词库列表
:return:
"""
results = jieba.cut(ques)
_words = list(results)
flag = False
count = 0
for word in _words:
if word in sensitive:
if word in ["台湾", "香港", "澳门", "西藏", "新疆", "共产党"]:
flag = True
else:
count += 1
if flag and count != 0:
return random.choice(["我们还是聊点别的吧", "听不大懂耶", "我们聊点别的吧", "听不大懂哎"])
elif (flag == False) and count != 0:
return random.choice(["我们还是聊点别的吧", "听不大懂耶", "我们聊点别的吧", "听不大懂哎"])
elif flag and count == 0:
return "approved"
else:
return "approved"
def load_sensitive():
"""
加载敏感词库
:return:
"""
with open("sensitive/keywords", 'r', encoding="utf-8") as file:
sensitive_words = file.readlines()
sensitive_words = [word.strip() for word in sensitive_words]
return sensitive_words
马建仓 AI 助手
尝试更多
代码解读
代码找茬
代码优化