import os
import gradio as gr
import yaml
from langchain.chains.llm import LLMChain
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import PromptTemplate

CONFIG_PATH = os.path.join('config', 'default_config.yaml')


def main():
    config = load_config()
    title = "Ask a Johns Hopkins Librarian!"
    description = """
    This chat bot is an expert on the <a href=https://jscholarship.library.jhu.edu/handle/1774.2/35703>Edward St. John
     Real Estate Program - Practicum Projects</a> collection. It will answer any question regarding these papers! 
    <img src="https://jscholarship.library.jhu.edu/assets/j10p/images/libraries.logo.small.horizontal.white.png" 
    width=200px>
    """
    article = """
    This Retrieval Augmented Retrieval (RAG) chat bot is designed to answer questions only on the 
    <a href="https://jscholarship.library.jhu.edu/handle/1774.2/35703">Edward St. John Real Estate Program - Practicum Projects</a> collection 
    """

    os.environ["HUGGINGFACEHUB_API_TOKEN"] = config['tokens']['hugging_face']
    gr.Interface(fn=predict,
                 inputs="text",
                 title=title,
                 description=description,
                 article=article,
                 examples=[
                     ["What plans did Baltimore have to transition from an industrial city to tourism?"],
                     ["What type of market analysis and feasibility studies were performed for the 9 St. Marys Street project in Annapolis Maryland?"],
                     ["What are the feasibility studies of moving Johns Hopkins Medicine admin departments back to the East Baltimore campus?"]
                 ],
                 outputs="text").launch(share=False)


def predict(prompt):
    config = load_config()

    prompt_template = """
    Answer the question based only on the following context:

    {context}

    ---

    Answer the question based on the above context: {question}
    """

    hf_embed_func = HuggingFaceEmbeddings(
        model_name="all-MiniLM-L6-v2",
        model_kwargs={'device': 'cpu'},
        encode_kwargs={'normalize_embeddings': False},
        cache_folder=config['models']['model_cache_path']
    )
    db = Chroma(persist_directory=config['chroma_db']['chroma_path'],
                embedding_function=hf_embed_func,
                collection_name="jscholar_rag")

    results = db.similarity_search_with_relevance_scores(prompt, k=7)

    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])

    llm = HuggingFaceEndpoint(
        repo_id="HuggingFaceH4/zephyr-7b-beta",
        task="text-generation",
        top_k=30,
        temperature=0.1,
        repetition_penalty=1.03,
        max_new_tokens=512,
    )
    prompt_template_filled = PromptTemplate(
        input_variables=[context_text, prompt], template=prompt_template
    )
    chat_model = LLMChain(llm=llm, prompt=prompt_template_filled)
    response_text = chat_model.invoke({'question': prompt, 'context': context_text})
    formatted_response = f"{response_text.get('text')}"
    return formatted_response


def load_config():
    with open(CONFIG_PATH, 'r') as file:
        loaded_data = yaml.safe_load(file)

    return loaded_data


if __name__ == "__main__":
    main()