File size: 2,218 Bytes
f3d0f1e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
from langchain_community.document_loaders import DataFrameLoader
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_core.prompts import ChatPromptTemplate
from langchain_community.vectorstores import FAISS
from langchain_community.llms import HuggingFaceHub
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.chains import create_retrieval_chain

import os
#from dotenv import load_dotenv

#Load environmental variables from .env-file
#load_dotenv()


# Load documents to create a vectorstore later
def load_documents(df):
    # To Do: Create one initial vectore store loading all the documents with this function
    #loader = CSVLoader(index_name, source_column="speech_content") #unprocessed csv file
    loader = DataFrameLoader(data_frame=df, page_content_column='speech_content') #df
    data = loader.load()
    splitter = RecursiveCharacterTextSplitter(
            chunk_size=1024,
            chunk_overlap=32,
            length_function=len,
            is_separator_regex=False,
        )
    documents = splitter.split_documents(documents=data)
    return documents

def get_vectorstore(embeddings, folder_path, index_name): 
    path = folder_path + "/" + index_name
    print(path)
    # To Do: Dynamicly update and merge verctorstores
    #if os.path.exists(path):
    db = FAISS.load_local(folder_path=folder_path, index_name=index_name,
                                            embeddings=embeddings, allow_dangerous_deserialization=True)
    #else:
        #db = FAISS.from_documents(documents, embeddings)
        #db.save_local(folder_path=folder_path, index_name=index_name)
        #pass
    return db

# Apply RAG by providing the context and the question to the LLM using the predefined template
def RAG(llm, prompt, db, question):       
    document_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)
    retriever = db.as_retriever()
    retrieval_chain = create_retrieval_chain(retriever, document_chain)

    response = retrieval_chain.invoke({"input": question})
    return response