tim-sanders's picture
Upload folder using huggingface_hub
febf07b verified
import os
import gradio as gr
import yaml
from langchain.chains.llm import LLMChain
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint
from langchain_community.vectorstores import Chroma
from langchain_core.prompts import PromptTemplate
CONFIG_PATH = os.path.join('config', 'default_config.yaml')
def main():
config = load_config()
title = "Ask a Johns Hopkins Librarian!"
description = """
This chat bot is an expert on the <a href=https://jscholarship.library.jhu.edu/handle/1774.2/35703>Edward St. John
Real Estate Program - Practicum Projects</a> collection. It will answer any question regarding these papers!
<img src="https://jscholarship.library.jhu.edu/assets/j10p/images/libraries.logo.small.horizontal.white.png"
width=200px>
"""
article = """
This Retrieval Augmented Retrieval (RAG) chat bot is designed to answer questions only on the
<a href="https://jscholarship.library.jhu.edu/handle/1774.2/35703">Edward St. John Real Estate Program - Practicum Projects</a> collection
"""
gr.Interface(fn=predict,
inputs="text",
title=title,
description=description,
article=article,
examples=[
["What plans did Baltimore have to transition from an industrial city to tourism?"],
["What type of market analysis and feasibility studies were performed for the 9 St. Marys Street project in Annapolis Maryland?"],
["What are the feasibility studies of moving Johns Hopkins Medicine admin departments back to the East Baltimore campus?"]
],
outputs="text").launch(share=True)
def predict(prompt):
config = load_config()
prompt_template = """
Answer the question based only on the following context:
{context}
---
Answer the question based on the above context: {question}
"""
hf_embed_func = HuggingFaceEmbeddings(
model_name="all-MiniLM-L6-v2",
model_kwargs={'device': 'cpu'},
encode_kwargs={'normalize_embeddings': False},
cache_folder=config['models']['model_cache_path']
)
db = Chroma(persist_directory=config['chroma_db']['chroma_path'],
embedding_function=hf_embed_func,
collection_name="jscholar_rag")
results = db.similarity_search_with_relevance_scores(prompt, k=7)
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
llm = HuggingFaceEndpoint(
repo_id="HuggingFaceH4/zephyr-7b-beta",
task="text-generation",
top_k=30,
temperature=0.1,
repetition_penalty=1.03,
max_new_tokens=512,
)
prompt_template_filled = PromptTemplate(
input_variables=[context_text, prompt], template=prompt_template
)
chat_model = LLMChain(llm=llm, prompt=prompt_template_filled)
response_text = chat_model.invoke({'question': prompt, 'context': context_text})
formatted_response = f"{response_text.get('text')}"
return formatted_response
def load_config():
with open(CONFIG_PATH, 'r') as file:
loaded_data = yaml.safe_load(file)
return loaded_data
if __name__ == "__main__":
main()