import os import gradio as gr import yaml from langchain.chains.llm import LLMChain from langchain_community.embeddings import HuggingFaceEmbeddings from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint from langchain_community.vectorstores import Chroma from langchain_core.prompts import PromptTemplate CONFIG_PATH = os.path.join('config', 'default_config.yaml') def main(): config = load_config() title = "Ask a Johns Hopkins Librarian!" description = """ This chat bot is an expert on the Edward St. John Real Estate Program - Practicum Projects collection. It will answer any question regarding these papers! """ article = """ This Retrieval Augmented Retrieval (RAG) chat bot is designed to answer questions only on the Edward St. John Real Estate Program - Practicum Projects collection """ os.environ["HUGGINGFACEHUB_API_TOKEN"] = config['tokens']['hugging_face'] gr.Interface(fn=predict, inputs="text", title=title, description=description, article=article, examples=[ ["What plans did Baltimore have to transition from an industrial city to tourism?"], ["What type of market analysis and feasibility studies were performed for the 9 St. Marys Street project in Annapolis Maryland?"], ["What are the feasibility studies of moving Johns Hopkins Medicine admin departments back to the East Baltimore campus?"] ], outputs="text").launch(share=False) def predict(prompt): config = load_config() prompt_template = """ Answer the question based only on the following context: {context} --- Answer the question based on the above context: {question} """ hf_embed_func = HuggingFaceEmbeddings( model_name="all-MiniLM-L6-v2", model_kwargs={'device': 'cpu'}, encode_kwargs={'normalize_embeddings': False}, cache_folder=config['models']['model_cache_path'] ) db = Chroma(persist_directory=config['chroma_db']['chroma_path'], embedding_function=hf_embed_func, collection_name="jscholar_rag") results = db.similarity_search_with_relevance_scores(prompt, k=7) context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results]) llm = HuggingFaceEndpoint( repo_id="HuggingFaceH4/zephyr-7b-beta", task="text-generation", top_k=30, temperature=0.1, repetition_penalty=1.03, max_new_tokens=512, ) prompt_template_filled = PromptTemplate( input_variables=[context_text, prompt], template=prompt_template ) chat_model = LLMChain(llm=llm, prompt=prompt_template_filled) response_text = chat_model.invoke({'question': prompt, 'context': context_text}) formatted_response = f"{response_text.get('text')}" return formatted_response def load_config(): with open(CONFIG_PATH, 'r') as file: loaded_data = yaml.safe_load(file) return loaded_data if __name__ == "__main__": main()