sunny77's picture
env
9e72591
from transformers import AutoModelForCausalLM, AutoTokenizer, TextStreamer, pipeline
from langchain.llms import HuggingFaceHub, HuggingFacePipeline
from dotenv import load_dotenv
from langchain.embeddings import HuggingFaceBgeEmbeddings
from langchain.vectorstores import Chroma
from langchain.chains import RetrievalQA
import textwrap
import os
def load_vector_store():
model_name = "BAAI/bge-small-en"
model_kwargs = {"device": "cpu"}
encode_kwargs = {"normalize_embeddings": True}
embeddings = HuggingFaceBgeEmbeddings(
model_name=model_name, model_kwargs=model_kwargs, encode_kwargs=encode_kwargs
)
print('Embeddings loaded!')
load_vector_store = Chroma(persist_directory = 'vector stores/ncertdb', embedding_function = embeddings)
print('Vector store loaded!')
retriever = load_vector_store.as_retriever(
search_kwargs = {"k" : 2},
)
return retriever
#model
def load_model():
load_dotenv()
os.environ["HUGGINGFACEHUB_API_TOKEN"] = os.getenv("HUGGINGFACEHUB_API_TOKEN")
repo_id = 'llmware/bling-sheared-llama-1.3b-0.1'
llm = HuggingFaceHub(
repo_id = repo_id,
model_kwargs = {'max_new_tokens' : 100}
)
print(llm('HI!'))
return llm
def qa_chain():
retriever = load_vector_store()
llm = load_model()
qa = RetrievalQA.from_chain_type(
llm = llm,
chain_type = 'stuff',
retriever = retriever,
return_source_documents = True,
verbose = True
)
return qa
def wrap_text_preserve_newlines(text, width=110):
# Split the input text into lines based on newline characters
lines = text.split('\n')
# Wrap each line individually
wrapped_lines = [textwrap.fill(line, width=width) for line in lines]
# Join the wrapped lines back together using newline characters
wrapped_text = '\n'.join(wrapped_lines)
return wrapped_text
def process_llm_response(llm_response):
print(wrap_text_preserve_newlines(llm_response['result']))
print('\n\nSources:')
for source in llm_response["source_documents"]:
print(source.metadata['source'])
qa = qa_chain()
response = qa('What are types of Embedded system?')
process_llm_response(response)