Fetch the repository succeeded.
This action will force synchronization from 狮子的魂/jcseg, which will overwrite any changes that you have made since you forked the repository, and can not be recovered!!!
Synchronous operation will process in the background and will refresh the page when finishing processing. Please be patient.
# jcseg server configuration file with standard json syntax
# question or report to chenxin<chenxin619315@gmail.com>
{
# jcseg server configuration
"server_config": {
# server port
"port": 1990,
# default conmunication charset
"charset": "utf-8",
# http idle timeout in ms
"http_connection_idle_timeout": 60000,
# jetty maximum thread pool size
"max_thread_pool_size": 200,
# thread idle timeout in ms
"thread_idle_timeout": 30000,
# http output buffer size
"http_output_buffer_size": 32768,
# request header size
"http_request_header_size": 8192,
# response header size
"http_response_header_size": 8192
},
# global setting for jcseg, yet another copy of the old
# configuration file jcseg.properties
"jcseg_global_config": {
# maximum match length. (5-7)
"jcseg_maxlen": 7,
# recognized the chinese name.
# (true to open and false to close it)
"jcseg_icnname": true,
# maximum length for pair punctuation text.
# set it to 0 to close this function
"jcseg_pptmaxlen": 7,
# maximum length for chinese last name andron.
"jcseg_cnmaxlnadron": 1,
# whether to clear the stopwords.
# (set true to clear stopwords and false to close it)
"jcseg_clearstopword": false,
# whether to convert the chinese numeric to arabic number.
# (set to true open it and false to close it) like '\u4E09\u4E07' to 30000.
"jcseg_cnnumtoarabic": true,
# whether to convert the chinese fraction to arabic fraction.
# @Note: for lucene,solr,elasticsearch eg.. close it.
"jcseg_cnfratoarabic": false,
# whether to keep the unrecognized word.
# (set true to keep unrecognized word and false to clear it)
"jcseg_keepunregword": true,
# whether to start the secondary segmentation for the complex english words.
"jcseg_ensencondseg": true,
# min length of the secondary simple token.
# (better larger than 1)
"jcseg_stokenminlen": 2,
#thrshold for chinese name recognize.
# better not change it before you know what you are doing.
"jcseg_nsthreshold": 1000000,
#The punctuations that will be keep in an token.
# (Not the end of the token).
"jcseg_keeppunctuations": "@#%.&+"
},
# dictionary instance setting.
# add yours here with standard json syntax
"jcseg_dict": {
"master": {
# set to null to load the lexicons from the classpath
path: null,
#"path": [
# "{jar.dir}/lexicon"
# # absolute path here
# # "/java/JavaSE/jcseg/lexicon"
#],
# wether to load the part of speech of the words
"loadpos": true,
# whether to load the pinyin of the words.
"loadpinyin": true,
# whether to load the synoyms words of the words.
"loadsyn": true,
# whether to load the entity of the words.
"loadentity": true,
# whether to load the modified lexicon file auto.
"autoload": true,
# Poll time for auto load. (in seconds)
"polltime": 300
}
# add more of yours here
# ,"name" : {
# "path": [
# "absolute jcseg standard lexicon path 1",
# "absolute jcseg standard lexicon path 2"
# ...
# ],
# "autoload": 0,
# "polltime": 300
# }
},
# JcsegTaskConfig instance setting.
# @Note:
# All the config instance here is extends from the global_setting above.
# do nothing will extends all the setting from global_setting
"jcseg_config": {
"master": {
# extends and Override the global setting
"jcseg_pptmaxlen": 0,
"jcseg_cnfratoarabic": true,
"jcseg_keepunregword": false
}
# this one is for keywords,keyphrase,sentence,summary extract
# @Note: do not delete this instance if u want jcseg to
# offset u extractor service
,"extractor": {
"jcseg_pptmaxlen": 0,
"jcseg_clearstopword": true,
"jcseg_cnnumtoarabic": false,
"jcseg_cnfratoarabic": false,
"jcseg_keepunregword": false,
"jcseg_ensencondseg": false
}
# well, this one is for NLP only
,"nlp" : {
"jcseg_ensencondseg": false,
"jcseg_cnfratoarabic": true,
"jcseg_cnnumtoarabic": true
}
# add more of yours here
# ,"name": {
# ...
# }
},
# jcseg tokenizer instance setting.
# Your could let the instance service for you by access:
# http://jcseg_server_host:port/tokenizer/instance_name
# instance_name is the name of instance you define here.
"jcseg_tokenizer": {
"master": {
# jcseg tokenizer algorithm, could be:
# 1: SIMPLE_MODE
# 2: COMPLEX_MODE
# 3: DETECT_MODE
# 4: SEARCH_MODE
# 5: DELIMITER_MODE
# 6: NLP_MODE
# see org.lionsoul.jcseg.tokenizer.core.JcsegTaskConfig for more info
"algorithm": 2,
# dictionary instance name
# choose one of your defines above in the dict scope
"dict": "master",
# JcsegTaskConfig instance name
# choose one of your defines above in the config scope
"config": "master"
}
# this tokenizer instance is for extractor service
# do not delete it if you want jcseg to offset you extractor service
,"extractor": {
"algorithm": 2,
"dict": "master",
"config": "extractor"
}
# this tokenizer instance of for NLP analysis
# keep it for you NLP project
,"nlp" : {
"algorithm": 6,
"dict": "master",
"config": "nlp"
}
# add more of your here
# ,"name": {
# ...
# }
}
}
此处可能存在不合适展示的内容,页面不予展示。您可通过相关编辑功能自查并修改。
如您确认内容无涉及 不当用语 / 纯广告导流 / 暴力 / 低俗色情 / 侵权 / 盗版 / 虚假 / 无价值内容或违法国家有关法律法规的内容,可点击提交进行申诉,我们将尽快为您处理。