|
import streamlit as st |
|
|
|
from langchain.document_loaders import WebBaseLoader |
|
from langchain.chains.summarize import load_summarize_chain |
|
from langchain.text_splitter import RecursiveCharacterTextSplitter |
|
from bs4 import BeautifulSoup |
|
from langchain import HuggingFaceHub |
|
import requests |
|
import sys |
|
from huggingface_hub import InferenceClient |
|
|
|
import os |
|
from dotenv import load_dotenv |
|
load_dotenv() |
|
hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN') |
|
repo_id=os.environ.get('repo_id') |
|
|
|
|
|
|
|
|
|
|
|
llm = HuggingFaceHub(repo_id=repo_id, |
|
huggingfacehub_api_token=hf_token, |
|
model_kwargs={"min_length": 512, |
|
"max_new_tokens": 1024, "do_sample": True, |
|
"temperature": 0.01, |
|
"top_k": 50, |
|
"top_p": 0.95, "eos_token_id": 49155}) |
|
|
|
|
|
|
|
chain = load_summarize_chain(llm, chain_type="refine") |
|
|
|
print(f"定义处理多余的Context文本的函数") |
|
def remove_context(text): |
|
|
|
if 'Context:' in text: |
|
|
|
end_of_context = text.find('\n\n') |
|
|
|
return text[end_of_context + 2:] |
|
else: |
|
|
|
return text |
|
print(f"处理多余的Context文本函数定义结束") |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
url=st.text_input("Enter webiste URL to summarize (format: https://www.usinoip.com):") |
|
|
|
if url !="" and not url.strip().isspace() and not url == "" and not url.strip() == "" and not url.isspace(): |
|
try: |
|
|
|
with st.spinner("AI Thinking...Please wait a while to Cheers!"): |
|
print("Website to Chat: "+url) |
|
|
|
loader = WebBaseLoader(url) |
|
docs = loader.load() |
|
print("Webpage contents loaded") |
|
|
|
|
|
|
|
result=chain.run(docs) |
|
|
|
print("Chain run results:") |
|
print(result) |
|
|
|
result=str(result) |
|
print("Chain run results in str format:") |
|
print(result) |
|
|
|
cleaned_initial_ai_response = remove_context(result) |
|
print("Ai Resposne result cleaned initially: "+cleaned_initial_ai_response) |
|
|
|
final_ai_response = cleaned_initial_ai_response.split('<|end|>')[0].strip().replace('\n\n', '\n').replace('<|end|>', '').replace('<|user|>', '').replace('<|system|>', '').replace('<|assistant|>', '') |
|
new_final_ai_response = final_ai_response.split('Unhelpful Answer:')[0].strip() |
|
final_result = new_final_ai_response.split('Note:')[0].strip() |
|
|
|
|
|
print("AI Summarization:") |
|
|
|
print(final_result) |
|
|
|
st.write("AI Summarization:") |
|
|
|
st.write(final_result) |
|
|
|
except Exception as e: |
|
st.write("Wrong URL or URL not parsable.") |