Spaces:
Runtime error
Runtime error
import os | |
import gradio as gr | |
import yaml | |
from langchain.chains.llm import LLMChain | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint | |
from langchain_community.vectorstores import Chroma | |
from langchain_core.prompts import PromptTemplate | |
CONFIG_PATH = os.path.join('config', 'default_config.yaml') | |
def main(): | |
config = load_config() | |
title = "Ask a Johns Hopkins Librarian!" | |
description = """ | |
This chat bot is an expert on the <a href=https://jscholarship.library.jhu.edu/handle/1774.2/35703>Edward St. John | |
Real Estate Program - Practicum Projects</a> collection. It will answer any question regarding these papers! | |
<img src="https://jscholarship.library.jhu.edu/assets/j10p/images/libraries.logo.small.horizontal.white.png" | |
width=200px> | |
""" | |
article = """ | |
This Retrieval Augmented Retrieval (RAG) chat bot is designed to answer questions only on the | |
<a href="https://jscholarship.library.jhu.edu/handle/1774.2/35703">Edward St. John Real Estate Program - Practicum Projects</a> collection | |
""" | |
gr.Interface(fn=predict, | |
inputs="text", | |
title=title, | |
description=description, | |
article=article, | |
examples=[ | |
["What plans did Baltimore have to transition from an industrial city to tourism?"], | |
["What type of market analysis and feasibility studies were performed for the 9 St. Marys Street project in Annapolis Maryland?"], | |
["What are the feasibility studies of moving Johns Hopkins Medicine admin departments back to the East Baltimore campus?"] | |
], | |
outputs="text").launch(share=True) | |
def predict(prompt): | |
config = load_config() | |
prompt_template = """ | |
Answer the question based only on the following context: | |
{context} | |
--- | |
Answer the question based on the above context: {question} | |
""" | |
hf_embed_func = HuggingFaceEmbeddings( | |
model_name="all-MiniLM-L6-v2", | |
model_kwargs={'device': 'cpu'}, | |
encode_kwargs={'normalize_embeddings': False}, | |
cache_folder=config['models']['model_cache_path'] | |
) | |
db = Chroma(persist_directory=config['chroma_db']['chroma_path'], | |
embedding_function=hf_embed_func, | |
collection_name="jscholar_rag") | |
results = db.similarity_search_with_relevance_scores(prompt, k=7) | |
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results]) | |
llm = HuggingFaceEndpoint( | |
repo_id="HuggingFaceH4/zephyr-7b-beta", | |
task="text-generation", | |
top_k=30, | |
temperature=0.1, | |
repetition_penalty=1.03, | |
max_new_tokens=512, | |
) | |
prompt_template_filled = PromptTemplate( | |
input_variables=[context_text, prompt], template=prompt_template | |
) | |
chat_model = LLMChain(llm=llm, prompt=prompt_template_filled) | |
response_text = chat_model.invoke({'question': prompt, 'context': context_text}) | |
formatted_response = f"{response_text.get('text')}" | |
return formatted_response | |
def load_config(): | |
with open(CONFIG_PATH, 'r') as file: | |
loaded_data = yaml.safe_load(file) | |
return loaded_data | |
if __name__ == "__main__": | |
main() |