tim-sanders's picture
Upload folder using huggingface_hub
85d4c3b verified
import argparse
import os
from dataclasses import dataclass
import chromadb
import yaml
from langchain.chains.llm import LLMChain
from langchain.vectorstores.chroma import Chroma
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint
from langchain_core.prompts import PromptTemplate
CONFIG_PATH = os.path.join('config', 'default_config.yaml')
CHROMA_PATH = "chroma"
MODEL_CACHE = "model_cache"
PROMPT_TEMPLATE = """
Answer the question based only on the following context:
{context}
---
Answer the question based on the above context: {question}
"""
def main():
# Create CLI.
parser = argparse.ArgumentParser()
parser.add_argument("query_text", type=str, help="The query text.")
args = parser.parse_args()
query_text = args.query_text
# Prepare the DB.
hf_embed_func = HuggingFaceEmbeddings(
model_name="all-MiniLM-L6-v2",
model_kwargs={'device': 'cpu'},
encode_kwargs={'normalize_embeddings': False},
cache_folder=MODEL_CACHE
)
db = Chroma(persist_directory=CHROMA_PATH, embedding_function=hf_embed_func, collection_name="jscholar_rag")
client = chromadb.PersistentClient(path=CHROMA_PATH)
collection = client.get_collection(name="jscholar_rag")
print(f"Total Embeddings: {collection.count()}")
print(collection.peek())
# Search the DB.
results = db.similarity_search_with_relevance_scores(query_text, k=5)
# results = db.similarity_search(query_text)
if len(results) == 0 or results[0][1] < 0.1:
print(f"Unable to find matching results.")
return
context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
# prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
prompt = PromptTemplate(
input_variables=[context_text, query_text], template=PROMPT_TEMPLATE
)
#prompt = prompt_template.format(context=context_text, question=query_text)
llm = HuggingFaceEndpoint(
repo_id="HuggingFaceH4/zephyr-7b-beta",
task="text-generation",
top_k=30,
temperature=0.1,
repetition_penalty=1.03,
max_new_tokens=512,
)
chat_model = LLMChain(prompt=prompt, llm=llm)
response_text = chat_model.invoke({'question': query_text, 'context': context_text})
sources = [doc.metadata.get("source", None) for doc, _score in results]
formatted_response = f"{response_text.get('text')}"
formatted_sources = f"Citations: {sources}"
print(formatted_response)
print(formatted_sources)
def load_config():
with open(CONFIG_PATH, 'r') as file:
loaded_data = yaml.safe_load(file)
return loaded_data
if __name__ == "__main__":
main()