Spaces:
Runtime error
Runtime error
import os | |
from dotenv import load_dotenv | |
from gradio.components import upload_button | |
from llama_index.llms.groq import Groq | |
from llama_index.llms.openai import OpenAI | |
from llama_index.core import Settings | |
from llama_index.embeddings.openai import OpenAIEmbedding | |
from llama_index.core.node_parser import SentenceSplitter | |
from llama_parse import LlamaParse | |
from llama_index.core import VectorStoreIndex, SimpleDirectoryReader, Settings | |
from llama_index.core.retrievers import VectorIndexRetriever | |
from llama_index.core import get_response_synthesizer | |
from llama_index.core.query_engine import RetrieverQueryEngine | |
from llama_index.core.postprocessor import SimilarityPostprocessor | |
#from llama_index.embeddings.huggingface import HuggingFaceEmbedding | |
import gradio as gr | |
import shutil | |
load_dotenv() | |
OPENAI_API_KEY = os.getenv('OPENAI_API_KEY') | |
#GROQ_API_KEY = os.getenv('GROQ_API_KEY') | |
LLAMAINDEX_API_KEY = os.getenv('LLAMAINDEX_API_KEY') | |
# llm = Groq(model="llama-3.1-70b-versatile", api_key=GROQ_API_KEY) | |
llm = OpenAI(model="gpt-4o-mini",api_key = OPENAI_API_KEY) | |
# response = llm.complete("Explain the importance of low latency LLMs") | |
# response.text | |
Settings.llm = llm | |
# set up embedding model | |
# embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5") | |
embed_model = OpenAIEmbedding() | |
Settings.embed_model = embed_model | |
# create splitter | |
splitter = SentenceSplitter(chunk_size=10000, chunk_overlap=100) | |
Settings.transformations = [splitter] | |
def upload_file(file_ls): | |
try: | |
shutil.rmtree('./data') | |
except: | |
pass | |
UPLOAD_FOLDER = './data' | |
if not os.path.exists(UPLOAD_FOLDER): | |
os.mkdir(UPLOAD_FOLDER) | |
for file in file_ls: | |
shutil.copy(file, UPLOAD_FOLDER) | |
gr.Info("File uploaded") | |
def process_documents(): | |
# create parser | |
parser = LlamaParse( | |
api_key=LLAMAINDEX_API_KEY, | |
result_type="markdown", # "markdown" and "text" are available | |
verbose=True, | |
) | |
filename_fn = lambda filename: {"file_name": filename} | |
required_exts = [".pdf",".docx"] | |
file_extractor = {".pdf": parser} | |
reader = SimpleDirectoryReader( | |
input_dir="./data", | |
file_extractor=file_extractor, | |
required_exts=required_exts, | |
recursive=True, | |
file_metadata=filename_fn | |
) | |
documents = reader.load_data() | |
len_docs = len(documents) | |
print("index creating with `%d` documents", len(documents)) | |
global index | |
index = VectorStoreIndex.from_documents(documents, embed_model=embed_model, transformations=[splitter]) | |
index.storage_context.persist(persist_dir="./vectordb") | |
return f"Processed {len_docs} documents successfully.{len_docs}" | |
def query_index(query_input): | |
# set up retriever | |
retriever = VectorIndexRetriever( | |
index=index, | |
similarity_top_k = 15, | |
#vector_store_query_mode="mmr", | |
#vector_store_kwargs={"mmr_threshold": 0.4} | |
) | |
# set up response synthesizer | |
# response_synthesizer = get_response_synthesizer() | |
# setting up query engine | |
query_engine = RetrieverQueryEngine( | |
retriever = retriever, | |
node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.53)], | |
response_synthesizer=get_response_synthesizer(response_mode="tree_summarize",verbose=True) | |
) | |
# print(query_engine.get_prompts()) | |
output = query_engine.query(query_input) | |
return output.response | |
# source_nodes_list = output.source_nodes | |
# Gradio interface | |
with gr.Blocks() as demo: | |
gr.Markdown("# RAG with Llamaindex") | |
upload_button = gr.UploadButton("Click to upload a file", file_count="multiple") | |
upload_button.upload(upload_file, upload_button) | |
# File upload interface | |
# with gr.Row(): | |
# docs = gr.Files(label="Upload Documents", file_types=[".txt", ".pdf"]) | |
# Process button | |
process_button = gr.Button("Process Documents") | |
# Output for document processing | |
process_output = gr.Textbox(label="Processing Output") | |
# Query interface | |
query_input = gr.Textbox(label="Enter your query") | |
query_button = gr.Button("Submit Query") | |
query_output = gr.Textbox(label="Response") | |
# Create Gradio interface for document upload | |
# upload_interface = gr.Interface( | |
# fn=process_documents, | |
# inputs=gr.inputs.File(file_count="multiple"), | |
# outputs="text", | |
# title="Upload Documents", | |
# description="Upload text files to index them for querying." | |
# ) | |
# # Linking the processing function | |
process_button.click(fn=process_documents, inputs=None, outputs=process_output) | |
# Linking the query function | |
query_button.click(fn=query_index, inputs=query_input, outputs=query_output) | |
# Run the interface | |
if __name__ == "__main__": | |
demo.launch() | |