Spaces:
Runtime error
Runtime error
from langchain_community.document_loaders import DataFrameLoader | |
from langchain_community.embeddings import HuggingFaceEmbeddings | |
from langchain_core.prompts import ChatPromptTemplate | |
from langchain_community.vectorstores import FAISS | |
from langchain_community.llms import HuggingFaceHub | |
from langchain.text_splitter import RecursiveCharacterTextSplitter | |
from langchain.chains.combine_documents import create_stuff_documents_chain | |
from langchain.chains import create_retrieval_chain | |
import os | |
#from dotenv import load_dotenv | |
#Load environmental variables from .env-file | |
#load_dotenv() | |
# Load documents to create a vectorstore later | |
def load_documents(df): | |
# To Do: Create one initial vectore store loading all the documents with this function | |
#loader = CSVLoader(index_name, source_column="speech_content") #unprocessed csv file | |
loader = DataFrameLoader(data_frame=df, page_content_column='speech_content') #df | |
data = loader.load() | |
splitter = RecursiveCharacterTextSplitter( | |
chunk_size=1024, | |
chunk_overlap=32, | |
length_function=len, | |
is_separator_regex=False, | |
) | |
documents = splitter.split_documents(documents=data) | |
return documents | |
def get_vectorstore(embeddings, folder_path, index_name): | |
path = folder_path + "/" + index_name | |
print(path) | |
# To Do: Dynamicly update and merge verctorstores | |
#if os.path.exists(path): | |
db = FAISS.load_local(folder_path=folder_path, index_name=index_name, | |
embeddings=embeddings, allow_dangerous_deserialization=True) | |
#else: | |
#db = FAISS.from_documents(documents, embeddings) | |
#db.save_local(folder_path=folder_path, index_name=index_name) | |
#pass | |
return db | |
# Apply RAG by providing the context and the question to the LLM using the predefined template | |
def RAG(llm, prompt, db, question): | |
document_chain = create_stuff_documents_chain(llm=llm, prompt=prompt) | |
retriever = db.as_retriever() | |
retrieval_chain = create_retrieval_chain(retriever, document_chain) | |
response = retrieval_chain.invoke({"input": question}) | |
return response | |