Spaces:

tim-sanders
/

JScholar_RAG_Prototype

Runtime error

App Files Files Community

JScholar_RAG_Prototype / query.py

tim-sanders

Upload folder using huggingface_hub

85d4c3b verified 7 months ago

raw

history blame contribute delete

2.87 kB

	import argparse
	import os
	from dataclasses import dataclass

	import chromadb
	import yaml
	from langchain.chains.llm import LLMChain
	from langchain.vectorstores.chroma import Chroma
	from langchain_community.embeddings import HuggingFaceEmbeddings
	from langchain_community.llms.huggingface_endpoint import HuggingFaceEndpoint
	from langchain_core.prompts import PromptTemplate

	CONFIG_PATH = os.path.join('config', 'default_config.yaml')
	CHROMA_PATH = "chroma"
	MODEL_CACHE = "model_cache"
	PROMPT_TEMPLATE = """
	Answer the question based only on the following context:

	{context}

	---

	Answer the question based on the above context: {question}
	"""


	def main():
	# Create CLI.
	parser = argparse.ArgumentParser()
	parser.add_argument("query_text", type=str, help="The query text.")
	args = parser.parse_args()
	query_text = args.query_text

	# Prepare the DB.
	hf_embed_func = HuggingFaceEmbeddings(
	model_name="all-MiniLM-L6-v2",
	model_kwargs={'device': 'cpu'},
	encode_kwargs={'normalize_embeddings': False},
	cache_folder=MODEL_CACHE
	)
	db = Chroma(persist_directory=CHROMA_PATH, embedding_function=hf_embed_func, collection_name="jscholar_rag")

	client = chromadb.PersistentClient(path=CHROMA_PATH)
	collection = client.get_collection(name="jscholar_rag")
	print(f"Total Embeddings: {collection.count()}")
	print(collection.peek())

	# Search the DB.
	results = db.similarity_search_with_relevance_scores(query_text, k=5)
	# results = db.similarity_search(query_text)
	if len(results) == 0 or results[0][1] < 0.1:
	print(f"Unable to find matching results.")
	return

	context_text = "\n\n---\n\n".join([doc.page_content for doc, _score in results])
	# prompt_template = ChatPromptTemplate.from_template(PROMPT_TEMPLATE)
	prompt = PromptTemplate(
	input_variables=[context_text, query_text], template=PROMPT_TEMPLATE
	)
	#prompt = prompt_template.format(context=context_text, question=query_text)

	llm = HuggingFaceEndpoint(
	repo_id="HuggingFaceH4/zephyr-7b-beta",
	task="text-generation",
	top_k=30,
	temperature=0.1,
	repetition_penalty=1.03,
	max_new_tokens=512,
	)
	chat_model = LLMChain(prompt=prompt, llm=llm)

	response_text = chat_model.invoke({'question': query_text, 'context': context_text})

	sources = [doc.metadata.get("source", None) for doc, _score in results]
	formatted_response = f"{response_text.get('text')}"
	formatted_sources = f"Citations: {sources}"
	print(formatted_response)
	print(formatted_sources)


	def load_config():
	with open(CONFIG_PATH, 'r') as file:
	loaded_data = yaml.safe_load(file)

	return loaded_data


	if __name__ == "__main__":
	main()