File size: 4,803 Bytes
f130ba6
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import os
from dotenv import load_dotenv
from gradio.components import upload_button
from llama_index.llms.groq import Groq
from llama_index.llms.openai import OpenAI
from llama_index.core import Settings
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.node_parser import SentenceSplitter
from llama_parse import LlamaParse
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core import get_response_synthesizer
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
#from llama_index.embeddings.huggingface import HuggingFaceEmbedding
import gradio as gr
import shutil

load_dotenv()

OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')   
#GROQ_API_KEY = os.getenv('GROQ_API_KEY')
LLAMAINDEX_API_KEY = os.getenv('LLAMAINDEX_API_KEY')

# llm = Groq(model="llama-3.1-70b-versatile", api_key=GROQ_API_KEY)
llm = OpenAI(model="gpt-4o-mini",api_key = OPENAI_API_KEY)
# response = llm.complete("Explain the importance of low latency LLMs")
# response.text
Settings.llm = llm

# set up embedding model
# embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")
embed_model = OpenAIEmbedding()
Settings.embed_model = embed_model

# create splitter
splitter = SentenceSplitter(chunk_size=10000, chunk_overlap=100)
Settings.transformations = [splitter]

def upload_file(file_ls):
    try:
        shutil.rmtree('./data')
    except:
        pass
    UPLOAD_FOLDER = './data'
    if not os.path.exists(UPLOAD_FOLDER):
        os.mkdir(UPLOAD_FOLDER)
    for file in file_ls:
        shutil.copy(file, UPLOAD_FOLDER)
    gr.Info("File uploaded")

def process_documents():
    # create parser
    parser = LlamaParse(
        api_key=LLAMAINDEX_API_KEY, 
        result_type="markdown",  # "markdown" and "text" are available
        verbose=True,
    )

    filename_fn = lambda filename: {"file_name": filename}
    required_exts = [".pdf",".docx"]
    file_extractor = {".pdf": parser}
    reader = SimpleDirectoryReader(
        input_dir="./data",
        file_extractor=file_extractor,
        required_exts=required_exts,
        recursive=True,
        file_metadata=filename_fn
    )
    documents = reader.load_data()
    len_docs = len(documents)
    print("index creating with `%d` documents", len(documents))
    global index
    index = VectorStoreIndex.from_documents(documents, embed_model=embed_model, transformations=[splitter])
    index.storage_context.persist(persist_dir="./vectordb")
    return f"Processed {len_docs} documents successfully.{len_docs}"

def query_index(query_input):
    # set up retriever
    retriever = VectorIndexRetriever(
        index=index,
        similarity_top_k = 15,
        #vector_store_query_mode="mmr",
        #vector_store_kwargs={"mmr_threshold": 0.4}
    )

    # set up response synthesizer
    # response_synthesizer = get_response_synthesizer()

    # setting up query engine
    query_engine = RetrieverQueryEngine(
        retriever = retriever,
        node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.53)],
        response_synthesizer=get_response_synthesizer(response_mode="tree_summarize",verbose=True) 
    )
    # print(query_engine.get_prompts())

    output = query_engine.query(query_input)
    return output.response
# source_nodes_list = output.source_nodes

# Gradio interface
with gr.Blocks() as demo:
    gr.Markdown("# RAG with Llamaindex")
    
    upload_button = gr.UploadButton("Click to upload a file", file_count="multiple")
    upload_button.upload(upload_file, upload_button)
    # File upload interface
    # with gr.Row():
    #     docs = gr.Files(label="Upload Documents", file_types=[".txt", ".pdf"])
    
    # Process button
    process_button = gr.Button("Process Documents")
    
    # Output for document processing
    process_output = gr.Textbox(label="Processing Output")
    
    # Query interface
    query_input = gr.Textbox(label="Enter your query")
    query_button = gr.Button("Submit Query")
    query_output = gr.Textbox(label="Response")

    # Create Gradio interface for document upload
    # upload_interface = gr.Interface(
    #     fn=process_documents,
    #     inputs=gr.inputs.File(file_count="multiple"),
    #     outputs="text",
    #     title="Upload Documents",
    #     description="Upload text files to index them for querying."
    # )
    # # Linking the processing function
    process_button.click(fn=process_documents, inputs=None, outputs=process_output)
    
    # Linking the query function
    query_button.click(fn=query_index, inputs=query_input, outputs=query_output)

# Run the interface
if __name__ == "__main__":
    demo.launch()