main

分支 (5)

管理

管理

main

revert-243-fix-empty-txt

dev

change-config

add-license-1

wenda
/
example.config.yml


logging: False
#日志"
port: 17860
#webui 默认启动端口号"
library:
     strategy: "calc:2 rtst:2 agents:0"
     #库参数，每组参数间用空格分隔，冒号前为知识库类型，后为抽取数量。

     #知识库类型:
     #bing        cn.bing搜索，仅国内可用，目前处于服务降级状态
     #sogowx      sogo微信公众号搜索，可配合相应auto实现全文内容分析
     #fess        fess搜索引擎
     #rtst        支持实时生成的sentence_transformers
     #remote      调用远程闻达知识库，用于集群化部署
     #kg          知识图谱,暂未启用
     #特殊库：
     #mix         根据参数进行多知识库融合
     #agents      提供网络资源代理，没有知识库查找功能，所以数量为0
     #            （目前stable-diffusion的auto脚本需要使用其中功能，同时需开启stable-diffusion的api功能）

     count: 5
     #最大抽取数量（所有知识库总和）

     step: 2
     #知识库默认上下文步长
librarys:
  bing:
    count: 5
     #最大抽取数量
  bingsite:
     count: 5
     #最大抽取数量
     site: "www.12371.cn"
     #搜索网站
  fess:
     count: 1
     #最大抽取数量
     fess_host: "127.0.0.1:8080"
     #fess搜索引擎的部署地址
  remote:
    host: "http://127.0.0.1:17860/api/find"
     #远程知识库地址地址
  rtst:
     count: 3
     #最大抽取数量
   #   backend: Annoy
     size: 20
     #分块大小"
     overlap: 0
     #分块重叠长度
     model_path: "model/m3e-base"
     #向量模型存储路径
     device: cuda
     #embedding运行设备
  qdrant:
     count: 3
     #最大抽取数量
     size: 20
     #分块大小
     overlap: 0
     #分块重叠长度
     path: txt
     #知识库文本路径
     model_path: "model/m3e-base"
     #向量模型存储路径
     #qdrant_host: "http://localhost:6333"
     #qdrant服务地址
     qdrant_path: "memory/q"
     #qdrant本地目录   qdrant_path   qdrant_host  同时只能存在一个
     device: cpu
     #qdrant运行设备
     batch_size: 32
     #向量模型批处理大小
     collection: qa_collection
     #qdrant集合名称
     similarity_threshold: 0.8
     #相似度阈值
  kg:
     count: 5
     #最大抽取数量
     knowledge_path: ""
     #知识库的文件夹目录名称，若留空则为txt
     graph_host: ""
     #图数据库部署地址
     model_path: ""
     #信息抽取模型所在路径"
llm_type: rwkv
#llm模型类型:glm6b、rwkv、llama、replitcode等，详见相关文件
llm_models:
  rwkv:
     path: "model/RWKV-4-World-7B-v1-OnlyForTest_52%_trained-20230606-ctx4096-i8.pth"      #rwkv模型位置"
     strategy: "cuda fp16i8"
   #   path: "model/rwkv_ggml_q8.bin"           #rwkv模型位置"
   #   strategy: "Q8_0"       #rwkvcpp:运行方式，设置strategy诸如"Q8_0->16"即可开启，代表运行Q8_0模型在16个cpu核心上
   #记得去https://github.com/saharNooby/rwkv.cpp/releases下最新版本文件替换librwkv.so或rwkv.dll
     #rwkv模型参数"

     historymode: state
     #rwkv历史记录实现方式：state、string。注意，对于torch实现，本参数已弃用，因为已经实现自动切换。
     state_source_device: cpu
     #torch实现下，会保存每个会话的state。每个占用2M显存，本参数控制将其复制到内存以节约显存。
     #置为cuda:0，代表state来自cuda:0，且需要将其复制到内存以节约显存。
     #置为cuda:1，同理。
     #置为cpu，代表不需要复制，如果使用cpu计算，或多卡计算需这么设置。
     presence_penalty: 0.2
     count_penalty: 0.2
  glm6b:
     path: "model\\chatglm2-6b-int4"
     #glm模型位置"
     strategy: "cuda fp16"
     #cuda fp16	 所有glm模型 要直接跑在gpu上都可以使用这个参数
     #cuda fp16i8	 fp16原生模型 要自行量化为int8跑在gpu上可以使用这个参数
     #cuda fp16i4	 fp16原生模型 要自行量化为int4跑在gpu上可以使用这个参数
     #cuda:0 fp16 *14 -> cuda:1	fp16 多卡流水线并行，使用方法参考RWKV的strategy介绍。总层数28
   #   lora: "model/lora-450"
     #glm-lora模型位置
  baichuan:
     path: "model\\baichuan-7B"
     #模型位置"
   #   lora: "model/64rank"
  aquila: #https://model.baai.ac.cn/model-detail/100101
     path: "AquilaChat-7B" #注意没有model/
     #模型位置"
     strategy: "cuda fp16"
  llama:
     path: "model/stable-vicuna-13B.ggml.q4_2.bin"
     #llama模型位置
   #   strategy: "Q8_0"       #cpp:运行方式，设置strategy诸如"Q8_0"即可开启
     strategy: "cuda fp16i8"
  moss:
     path: "model/moss-moon-003-sft-plugin-int4"
     #模型位置
     strategy: ""
     #模型参数 暂时不用"
  openai:
   #   api_host: "https://gpt.lucent.blog/v1" #网友的反代
   #   api_host: "https://api.openai.com/v1" #官方地址
     api_host: "http://127.0.0.1:8000/v1" #rwkv runner

  replitcode:
     path: "model\\replit-code-v1-3b"
     #replitcode模型位置
     #说明：目前模型参数和chat模型差异较大，写死了，不能通过wenda界面配置，需要调整自行到llm_replitcode.py 文件中调整，或放开wenda界面参数
     #y = model.generate(x, max_length=100, do_sample=true, top_p=0.95, top_k=4, temperature=0.2, num_return_sequences=1, eos_token_id=tokenizer.eos_token_id)
     #模型地址：https://huggingface.co/replit/replit-code-v1-3b ，约10g
     #作用代码补全：问：def fibonacci(n):
     #答：def fibonacci(n):
     #if n == 0:
     #return 0
     #elif n == 1:
     #return 1
     #else:
     #return fibonacci(n-1) + fibonacci(n-2)
     #print(fibonacci(10))
     strategy: "cuda fp16"