代码拉取完成,页面将自动刷新
# -*- codeing = utf-8 -*-
import os
from itertools import combinations
from typing import Any, Text, Dict
from rasa.nlu.extractors.extractor import EntityExtractor
class MatchEntityExtractor(EntityExtractor):
"""绝对匹配提取实体"""
provides = ["entities"]
defaults = {
"dictionary_path": None,
"take_long": None,
"take_short": None
}
def __init__(self, component_config=None):
print("init")
super(MatchEntityExtractor, self).__init__(component_config)
self.dictionary_path = self.component_config.get("dictionary_path")
self.take_long = self.component_config.get("take_long")
self.take_short = self.component_config.get("take_short")
if self.take_long and self.take_short:
raise ValueError("take_long and take_short can not be both True")
self.data = {} # 用于绝对匹配的数据
for file_path in os.listdir(self.dictionary_path):
if file_path.endswith(".txt"):
file_path = os.path.join(self.dictionary_path, file_path)
file_name = os.path.basename(file_path)[:-4]
with open(file_path, mode="r", encoding="utf-8") as f:
self.data[file_name] = f.read().splitlines()
def process(self, message, **kwargs):
"""绝对匹配提取实体词"""
print("process")
entities = []
for entity, value in self.data.items():
for i in value:
start = message.text.find(i)
if start != -1:
entities.append({
"start": start,
"end": start + len(i),
"value": i,
"entity": entity,
"confidence": 1
})
if self.take_long or self.take_short:
for i in list(combinations(entities, 2)):
v0, v1 = i[0]["value"], i[1]["value"]
if v0 in v1 or v1 in v0:
(long, short) = (i[0], i[1]) if len(v0) > len(v1) else (i[1], i[0])
if self.take_long == True and short in entities:
entities.remove(short)
if self.take_short == True and long in entities:
entities.remove(long)
extracted = self.add_extractor_name(entities)
message.set("entities", extracted, add_to_output=True)
@classmethod
def load(cls, meta: Dict[Text, Any], model_dir=None, model_metadata=None, cached_component=None, **kwargs):
print("load")
return cls(meta)
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。