Spaces:
Runtime error
Runtime error
File size: 2,218 Bytes
f3d0f1e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 |
from langchain_community.document_loaders import DataFrameLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFaceHub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain
import os
#from dotenv import load_dotenv
#Load environmental variables from .env-file
#load_dotenv()
# Load documents to create a vectorstore later
def load_documents(df):
# To Do: Create one initial vectore store loading all the documents with this function
#loader = CSVLoader(index_name, source_column="speech_content") #unprocessed csv file
loader = DataFrameLoader(data_frame=df, page_content_column='speech_content') #df
data = loader.load()
splitter = RecursiveCharacterTextSplitter(
chunk_size=1024,
chunk_overlap=32,
length_function=len,
is_separator_regex=False,
)
documents = splitter.split_documents(documents=data)
return documents
def get_vectorstore(embeddings, folder_path, index_name):
path = folder_path + "/" + index_name
print(path)
# To Do: Dynamicly update and merge verctorstores
#if os.path.exists(path):
db = FAISS.load_local(folder_path=folder_path, index_name=index_name,
embeddings=embeddings, allow_dangerous_deserialization=True)
#else:
#db = FAISS.from_documents(documents, embeddings)
#db.save_local(folder_path=folder_path, index_name=index_name)
#pass
return db
# Apply RAG by providing the context and the question to the LLM using the predefined template
def RAG(llm, prompt, db, question):
document_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)
retriever = db.as_retriever()
retrieval_chain = create_retrieval_chain(retriever, document_chain)
response = retrieval_chain.invoke({"input": question})
return response
|