File size: 4,622 Bytes
a6a2aba
a361f2d
 
 
a6a2aba
 
08feaf0
a6a2aba
 
b16b9b6
193ff55
 
a1461d3
 
a6a2aba
a1461d3
a6a2aba
c059691
a6a2aba
 
c059691
797f174
a6a2aba
 
 
 
 
4a859c9
 
a6a2aba
0474d21
 
 
 
 
 
 
 
 
 
 
 
 
a6a2aba
a1461d3
 
a361f2d
a6a2aba
0f7b3d4
 
 
 
 
 
d35ff03
a6a2aba
d35ff03
 
a1461d3
 
 
 
 
 
 
b16b9b6
 
 
 
 
797f174
0474d21
 
 
 
 
 
 
 
1178f16
 
 
 
 
7257ca0
 
 
 
0474d21
 
 
797f174
0474d21
d2d2af5
0474d21
01ae07f
0f7b3d4
b16b9b6
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
from fastapi import FastAPI, Request, HTTPException
from fastapi.responses import JSONResponse
from pydantic import BaseModel

from langchain.document_loaders import WebBaseLoader
from langchain.chains.summarize import load_summarize_chain
#https://python.langchain.com/docs/use_cases/summarization
from langchain import HuggingFaceHub
from huggingface_hub import InferenceClient
from bs4 import BeautifulSoup
import requests
import sys
import os
from dotenv import load_dotenv
load_dotenv()

hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
repo_id=os.environ.get('repo_id')
#port = os.getenv('port')

llm = HuggingFaceHub(repo_id=repo_id,  # for StarChat
                     huggingfacehub_api_token=hf_token,  #这个变量huggingfacehub_api_token名称似乎没有问题!
                     model_kwargs={"min_length": 512,  # for StarChat
                                   "max_new_tokens": 1024, "do_sample": True,  # for StarChat
                                   "temperature": 0.01,
                                   "top_k": 50,
                                   "top_p": 0.95, "eos_token_id": 49155})
#chain = load_summarize_chain(llm, chain_type="stuff")
chain = load_summarize_chain(llm, chain_type="refine")

print(f"定义处理多余的Context文本的函数")
def remove_context(text):
    # 检查 'Context:' 是否存在
    if 'Context:' in text:
        # 找到第一个 '\n\n' 的位置
        end_of_context = text.find('\n\n')
        # 删除 'Context:' 到第一个 '\n\n' 之间的部分
        return text[end_of_context + 2:]  # '+2' 是为了跳过两个换行符
    else:
        # 如果 'Context:' 不存在,返回原始文本
        return text
print(f"处理多余的Context文本函数定义结束")    

app = FastAPI()
class ChatRequest(BaseModel):
    target_url: str
#似乎必须要有这个class定义???

#@app.post('/')
#async def home_api(request: Request):
#    data = await request.json()
#    target_url = data['target_url']
#    print(target_url)
#    return {"Message": "FastAPI Home API Deploy Success on HF"}
    
@app.post('/api/chat')
async def chat(chat_request: ChatRequest):  
#async def chat(request: Request, chat_request: ChatRequest):
    target_url = chat_request.target_url
#@app.post('/api/chat')
#async def chat(request: Request):
#    data = await request.json()
#    target_url = data['target_url']
#以上四行代码,可能有问题?出在data = await request.json()???理论上似乎也没有问题,因为后面的print(target_url),能够成功打印
    
    print(target_url)
    try:
        loader = WebBaseLoader(target_url)
        print(target_url)
        docs = loader.load()
        result = chain.run(docs)
        print(result)
        print()
        
        result=str(result)   #找到之前总是POST Error的原因:chain.run(docs)的结果,格式不是str,导致{...}的json故障所致
        cleaned_initial_ai_response = remove_context(result)
        print(cleaned_initial_ai_response)
        print()
        
        final_ai_response = cleaned_initial_ai_response.partition('¿Cuál es')[0].strip()
        final_ai_response = final_ai_response.partition('¿Cuáles')[0].strip()        
        
        #final_ai_response = cleaned_initial_ai_response.split('¿cuál e')[0].strip().replace('\n\n', '\n').replace('<|end|>', '').replace('<|user|>', '').replace('<|system|>', '').replace('<|assistant|>', '')
        
        print("final_ai_response after cleaning/splitting/partitioning '¿cuál e'")
        print(final_ai_response)
        print()
        final_ai_response = final_ai_response.split('<|end|>')[0].strip().replace('\n\n', '\n').replace('<|end|>', '').replace('<|user|>', '').replace('<|system|>', '').replace('<|assistant|>', '')
        new_final_ai_response = final_ai_response.split('Unhelpful Answer:')[0].strip()
        final_result = new_final_ai_response.split('Note:')[0].strip()        
        
        #result="Hi FastAPI"   #用这个调试OK
        print("AI Summarization: " + final_result)
        #return {'response': result}  #FastAPI方式下,这个返回形式,有问题? - NO!不是的!可以工作!
        return JSONResponse({'response': final_result})   #这个形式也是OK的 - 只要result是字符形式即可?
        
        #对比FlaskAPI的:return jsonify({'response': result}),JSONResponse vs jsonify(都需要相应的from ... import ...)
    except Exception as e:
        err_msg = "Wrong URL or URL not parsable."
        print(err_msg)
        raise HTTPException(status_code=400, detail=err_msg)