main

分支 (2)

管理

管理

main

dev

ChatGLM3
/
web_api.py

from fastapi import FastAPI, Request
import uvicorn, json, datetime

from modelscope import AutoTokenizer, AutoModel, snapshot_download
model_dir = snapshot_download("ZhipuAI/chatglm3-6b-32k", revision = "v1.0.0")
tokenizer = AutoTokenizer.from_pretrained(model_dir, trust_remote_code=True)
model = AutoModel.from_pretrained(model_dir, trust_remote_code=True).half().cuda()
model = model.eval()

def getBizPrompt(fn):
    with open(fn, 'r',encoding="utf-8") as file:
        content = file.read()
    return content
history=[]

app = FastAPI()

@app.post("/gpu/his/clear")
async def gpuHisClear(request: Request):
    global model, tokenizer,history
    history=[]
    return history

@app.post("/gpu/chat")
async def gpuChat(request: Request):
    global model, tokenizer,history
    json_post_raw = await request.json()
    json_post = json.dumps(json_post_raw)
    json_post_list = json.loads(json_post)
    content = json_post_list.get('content')
    withHistory = json_post_list.get('withHistory')
    responseType = json_post_list.get('responseType')

    if withHistory and len(history) == 0:
        u1={"role": "user","content":"下面是你和我对话的背景信息:\n\n\n\n{}".format(getBizPrompt(json_post_list.get('BizPrompt')))}
        x1={"role": "assistant","metadata": "","content":"好的，请问您有什么问题吗？"}
        history.append(u1)
        history.append(x1)

    response, history = model.chat(tokenizer, content, history=history if withHistory else [] )
    if  "history" == responseType :
        return history
    else:
        return response

if __name__ == '__main__':
    uvicorn.run(app, host='0.0.0.0', port=9000, workers=1)