import streamlit as st
#from langchain.chat_models import ChatOpenAI
from langchain.document_loaders import WebBaseLoader
from langchain.chains.summarize import load_summarize_chain
from langchain.text_splitter import RecursiveCharacterTextSplitter
from bs4 import BeautifulSoup   #WebBaseLoader会需要 用到？
from langchain import HuggingFaceHub
import requests
import sys
from huggingface_hub import InferenceClient

import os
from dotenv import load_dotenv
load_dotenv()
hf_token = os.environ.get('HUGGINGFACEHUB_API_TOKEN')
repo_id=os.environ.get('repo_id')

#port = os.getenv('port')

#OPENAI_API_KEY = os.environ.get('OPENAI_API_KEY')

llm = HuggingFaceHub(repo_id=repo_id,  # for StarChat
                     huggingfacehub_api_token=hf_token,  #这个变量huggingfacehub_api_token名称似乎没有问题！
                     model_kwargs={"min_length": 512,  # for StarChat
                                   "max_new_tokens": 1024, "do_sample": True,  # for StarChat
                                   "temperature": 0.01,
                                   "top_k": 50,
                                   "top_p": 0.95, "eos_token_id": 49155})

#chain = load_summarize_chain(llm, chain_type="stuff")    #stuff模式容易导致出错：估计是超LLM的token限制所致

chain = load_summarize_chain(llm, chain_type="refine")

print(f"定义处理多余的Context文本的函数")
def remove_context(text):
    # 检查 'Context:' 是否存在
    if 'Context:' in text:
        # 找到第一个 '\n\n' 的位置
        end_of_context = text.find('\n\n')
        # 删除 'Context:' 到第一个 '\n\n' 之间的部分
        return text[end_of_context + 2:]  # '+2' 是为了跳过两个换行符
    else:
        # 如果 'Context:' 不存在，返回原始文本
        return text
print(f"处理多余的Context文本函数定义结束")    

#text_splitter_rcs = RecursiveCharacterTextSplitter(
#    #separator = "\n", #TypeError: TextSplitter.__init__() got an unexpected keyword argument 'separator'
#    chunk_size = 500,
#    chunk_overlap  = 100, #striding over the text
#    length_function = len,
#    )

#llm = ChatOpenAI(temperature=0, model_name="gpt-3.5-turbo-16k")

url=st.text_input("Enter webiste URL to summarize (format: https://www.usinoip.com):")

if url !="" and not url.strip().isspace() and not url == "" and not url.strip() == "" and not url.isspace():
    try:
        #loader = WebBaseLoader("https://www.usinoip.com/")
        with st.spinner("AI Thinking...Please wait a while to Cheers!"):
            print("Website to Chat: "+url)   
            
            loader = WebBaseLoader(url)
            docs = loader.load()
            print("Webpage contents loaded")
            
            #split_docs = text_splitter_rcs.split_documents(docs)
            #print(split_docs)            
            result=chain.run(docs)   #这个result的格式比较特殊，可以直接print，但不可以和其他字符串联合print输出 - this step errors!
            #result=chain.run(split_docs)   #找到之前总是POST Error的原因：chain.run(docs)的结果，格式不是str，导致程序错误
            print("Chain run results:")
            print(result)
            
            result=str(result)  
            print("Chain run results in str format:")  
            print(result)
            
            cleaned_initial_ai_response = remove_context(result)
            print("Ai Resposne result cleaned initially: "+cleaned_initial_ai_response)     
            
            final_ai_response = cleaned_initial_ai_response.split('<|end|>')[0].strip().replace('\n\n', '\n').replace('<|end|>', '').replace('<|user|>', '').replace('<|system|>', '').replace('<|assistant|>', '')
            new_final_ai_response = final_ai_response.split('Unhelpful Answer:')[0].strip()
            final_result = new_final_ai_response.split('Note:')[0].strip()   
            
            #print("AI Summarization: "+result)   #这个会出错，原因见上方
            print("AI Summarization:")
            #print(result)
            print(final_result)     
            
            st.write("AI Summarization:")
            #st.write(result)
            st.write(final_result)
            
    except Exception as e:
        st.write("Wrong URL or URL not parsable.")