File size: 3,533 Bytes
85d4c3b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
import os
import gradio as gr
import yaml
from langchain.chains.llm import LLMChain
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import PromptTemplate

CONFIG_PATH = os.path.join('config', 'default_config.yaml')


def main():
    config = load_config()
    title = "Ask a Johns Hopkins Librarian!"
    description = """

    This chat bot is an expert on the <a href=https://jscholarship.library.jhu.edu/handle/1774.2/35703>Edward St. John

     Real Estate Program - Practicum Projects</a> collection. It will answer any question regarding these papers! 

    <img src="https://jscholarship.library.jhu.edu/assets/j10p/images/libraries.logo.small.horizontal.white.png" 

    width=200px>

    """
    article = """

    This Retrieval Augmented Retrieval (RAG) chat bot is designed to answer questions only on the 

    <a href="https://jscholarship.library.jhu.edu/handle/1774.2/35703">Edward St. John Real Estate Program - Practicum Projects</a> collection 

    """

    os.environ["HUGGINGFACEHUB_API_TOKEN"] = config['tokens']['hugging_face']
    gr.Interface(fn=predict,
                 inputs="text",
                 title=title,
                 description=description,
                 article=article,
                 examples=[
                     ["What plans did Baltimore have to transition from an industrial city to tourism?"],
                     ["What type of market analysis and feasibility studies were performed for the 9 St. Marys Street project in Annapolis Maryland?"],
                     ["What are the feasibility studies of moving Johns Hopkins Medicine admin departments back to the East Baltimore campus?"]
                 ],
                 outputs="text").launch(share=False)


def predict(prompt):
    config = load_config()

    prompt_template = """

    Answer the question based only on the following context:



    {context}



    ---



    Answer the question based on the above context: {question}

    """

    hf_embed_func = HuggingFaceEmbeddings(
        model_name="all-MiniLM-L6-v2",
        model_kwargs={'device': 'cpu'},
        encode_kwargs={'normalize_embeddings': False},
        cache_folder=config['models']['model_cache_path']
    )
    db = Chroma(persist_directory=config['chroma_db']['chroma_path'],
                embedding_function=hf_embed_func,
                collection_name="jscholar_rag")

    results = db.similarity_search_with_relevance_scores(prompt, k=7)

    context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])

    llm = HuggingFaceEndpoint(
        repo_id="HuggingFaceH4/zephyr-7b-beta",
        task="text-generation",
        top_k=30,
        temperature=0.1,
        repetition_penalty=1.03,
        max_new_tokens=512,
    )
    prompt_template_filled = PromptTemplate(
        input_variables=[context_text, prompt], template=prompt_template
    )
    chat_model = LLMChain(llm=llm, prompt=prompt_template_filled)
    response_text = chat_model.invoke({'question': prompt, 'context': context_text})
    formatted_response = f"{response_text.get('text')}"
    return formatted_response


def load_config():
    with open(CONFIG_PATH, 'r') as file:
        loaded_data = yaml.safe_load(file)

    return loaded_data


if __name__ == "__main__":
    main()