Spaces:
Paused
Paused
import streamlit as st | |
from llama_index import VectorStoreIndex | |
from llama_index import ServiceContext | |
from llama_index.embeddings import HuggingFaceEmbedding | |
from llama_index.llms import HuggingFaceInferenceAPI | |
from llama_index.schema import Document | |
from PyPDF2 import PdfReader | |
# Streamlit title and description | |
st.title("PDF querying using Llama-Index by Rahul Bhoyar") | |
st.write("Base Model: **HuggingFaceH4/zephyr-7b-alpha (open-source from HuggingFace)**") | |
st.write("Embedding Model: **WhereIsAI/UAE-Large-V1 (open-source from HuggingFace)**") | |
st.write("This app allows you to upload your own PDF and query your document.") | |
hf_token = st.text_input("Enter your Hugging Face token:") | |
def read_pdf(uploaded_file): | |
pdf_reader = PdfReader(uploaded_file) | |
text = "" | |
for page_num in range(len(pdf_reader.pages)): | |
text += pdf_reader.pages[page_num].extract_text() | |
return text | |
# Streamlit input for user file upload | |
success = False | |
query_engine_creation = False | |
uploaded_pdf = st.file_uploader("Upload your PDF", type=['pdf']) | |
# Load data and configure the index | |
if uploaded_pdf is not None: | |
file_contents = read_pdf(uploaded_pdf) | |
documents = Document(text=file_contents) | |
documents = [documents] | |
st.success("Documents loaded successfully!") | |
model = st.selectbox('Select the model', ('google/flan-t5-xxl','HuggingFaceH4/zephyr-7b-alpha'), index=0) | |
llm = HuggingFaceInferenceAPI(model_name=model, token=hf_token) | |
with st.spinner('Creating Vector Embeddings...'): | |
embed_model_uae = HuggingFaceEmbedding(model_name="WhereIsAI/UAE-Large-V1") | |
service_context = ServiceContext.from_defaults( | |
llm=llm, chunk_size=800, chunk_overlap=20, embed_model=embed_model_uae | |
) | |
index = VectorStoreIndex.from_documents(documents, service_context=service_context, show_progress=True) | |
index.storage_context.persist() | |
query_engine = index.as_query_engine() | |
query_engine_creation = True | |
# Display the result of the task | |
st.success("Vector embeddings created.") | |
success = True | |
else: | |
st.write("Please upload a file first.") | |
if query_engine_creation: | |
# Streamlit input for user query | |
if success: | |
user_query = st.text_input("Enter your query:") | |
# Query engine with user input | |
if user_query: | |
with st.spinner('Fetching the response...'): | |
response = query_engine.query(user_query) | |
st.markdown(f"**Response:** {response}") | |