NoteBot / app.py
I-AdityaGoyal's picture
Upload 8 files
1272fb9 verified
import streamlit as st
import os
from pdf_processing import extract_text_from_pdf
from youtube_processing import extract_text_from_youtube
from faiss_indexing import get_embeddings, create_faiss_index, query_faiss_index
from utils import load_environment_variables, query_huggingface_api, chunk_text
from pdf_generator import generate_pdf
from text_to_speech import speak_text
from sentence_transformers import SentenceTransformer
# Load environment variables
hf_token = load_environment_variables()
if not hf_token:
st.error("Hugging Face API token is missing. Please add it to your .env file.")
st.stop()
# Define the Hugging Face API endpoint
API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2"
headers = {
"Authorization": f"Bearer {hf_token}"
}
# Initialize the sentence transformer model
model_name = 'all-MiniLM-L6-v2'
model = SentenceTransformer(model_name)
# Streamlit UI
st.title("NoteBot - Notes Retrieval System")
st.write("By - Aditya Goyal")
st.write("Upload PDFs or provide YouTube links to ask questions about their content.")
uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True)
youtube_url = st.text_input("Enter YouTube video URL:")
all_chunks = []
# Process PDF files
if uploaded_files:
for uploaded_file in uploaded_files:
pdf_path = os.path.join("temp", uploaded_file.name)
if not os.path.exists("temp"):
os.makedirs("temp")
with open(pdf_path, "wb") as f:
f.write(uploaded_file.getbuffer())
text = extract_text_from_pdf(pdf_path)
chunks = chunk_text(text)
all_chunks.extend(chunks)
# Process YouTube video
if youtube_url:
yt_text = extract_text_from_youtube(youtube_url)
yt_chunks = chunk_text(yt_text)
all_chunks.extend(yt_chunks)
if all_chunks:
embeddings = get_embeddings(all_chunks, model)
faiss_index = create_faiss_index(embeddings)
query_text = st.text_input("Enter your query:")
if query_text:
query_embedding = get_embeddings([query_text], model)
distances, indices = query_faiss_index(faiss_index, query_embedding)
similar_chunks = [all_chunks[i] for i in indices[0]]
# Ensure we only use a manageable number of chunks
num_chunks_to_use = min(5, len(similar_chunks))
selected_chunks = similar_chunks[:num_chunks_to_use]
template = """Based on the following chunks: {similar_chunks}
Question: {question}
Answer:"""
prompt_text = template.format(similar_chunks="\n".join(selected_chunks), question=query_text)
# Generate response from Hugging Face API
response = query_huggingface_api(prompt_text, API_URL, headers)
if "Error" not in response:
st.write("**Answer:**", response)
# Add button to download response as PDF
if st.button("Download Response as PDF"):
pdf_path = os.path.join("temp", "response.pdf")
generate_pdf(response, pdf_path)
with open(pdf_path, "rb") as f:
st.download_button(label="Download PDF", data=f, file_name="response.pdf")
# Add button to speak the response text
if st.button("Speak Response"):
speak_text(response)
else:
st.error(response)