import streamlit as st |
from langchain.document_loaders import WebBaseLoader |
from langchain.chains.summarize import load_summarize_chain |
from langchain.text_splitter import RecursiveCharacterTextSplitter |
from bs4 import BeautifulSoup |
from langchain import HuggingFaceHub |
import requests |
import sys |
from huggingface_hub import InferenceClient |
import os |
from dotenv import load_dotenv |
load_dotenv() |
hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN') |
repo_id=os.environ.get('repo_id') |
llm = HuggingFaceHub(repo_id=repo_id, |
huggingfacehub_api_token=hf_token, |
model_kwargs={"min_length": 512, |
"max_new_tokens": 1024, "do_sample": True, |
"temperature": 0.01, |
"top_k": 50, |
"top_p": 0.95, "eos_token_id": 49155}) |
chain = load_summarize_chain(llm, chain_type="refine") |
print(f"定义处理多余的Context文本的函数") |
def remove_context(text): |
if 'Context:' in text: |
end_of_context = text.find('\n\n') |
return text[end_of_context + 2:] |
else: |
return text |
print(f"处理多余的Context文本函数定义结束") |
url=st.text_input("Enter webiste URL to summarize (format: https://www.usinoip.com):") |
if url !="" and not url.strip().isspace() and not url == "" and not url.strip() == "" and not url.isspace(): |
try: |
with st.spinner("AI Thinking...Please wait a while to Cheers!"): |
print("Website to Chat: "+url) |
loader = WebBaseLoader(url) |
docs = loader.load() |
print("Webpage contents loaded") |
result=chain.run(docs) |
print("Chain run results:") |
print(result) |
result=str(result) |
print("Chain run results in str format:") |
print(result) |
cleaned_initial_ai_response = remove_context(result) |
print("Ai Resposne result cleaned initially: "+cleaned_initial_ai_response) |
final_ai_response = cleaned_initial_ai_response.split('<|end|>')[0].strip().replace('\n\n', '\n').replace('<|end|>', '').replace('<|user|>', '').replace('<|system|>', '').replace('<|assistant|>', '') |
new_final_ai_response = final_ai_response.split('Unhelpful Answer:')[0].strip() |
final_result = new_final_ai_response.split('Note:')[0].strip() |
print("AI Summarization:") |
print(final_result) |
st.write("AI Summarization:") |
st.write(final_result) |
except Exception as e: |
st.write("Wrong URL or URL not parsable.") |