Spaces:
Running
Running
import streamlit as st | |
import os | |
from pdf_processing import extract_text_from_pdf | |
from youtube_processing import extract_text_from_youtube | |
from faiss_indexing import get_embeddings, create_faiss_index, query_faiss_index | |
from utils import load_environment_variables, query_huggingface_api, chunk_text | |
from pdf_generator import generate_pdf | |
from text_to_speech import speak_text | |
from sentence_transformers import SentenceTransformer | |
# Load environment variables | |
hf_token = load_environment_variables() | |
if not hf_token: | |
st.error("Hugging Face API token is missing. Please add it to your .env file.") | |
st.stop() | |
# Define the Hugging Face API endpoint | |
API_URL = "https://api-inference.huggingface.co/models/mistralai/Mistral-7B-Instruct-v0.2" | |
headers = { | |
"Authorization": f"Bearer {hf_token}" | |
} | |
# Initialize the sentence transformer model | |
model_name = 'all-MiniLM-L6-v2' | |
model = SentenceTransformer(model_name) | |
# Streamlit UI | |
st.title("NoteBot - Notes Retrieval System") | |
st.write("By - Aditya Goyal") | |
st.write("Upload PDFs or provide YouTube links to ask questions about their content.") | |
uploaded_files = st.file_uploader("Upload PDF files", type="pdf", accept_multiple_files=True) | |
youtube_url = st.text_input("Enter YouTube video URL:") | |
all_chunks = [] | |
# Process PDF files | |
if uploaded_files: | |
for uploaded_file in uploaded_files: | |
pdf_path = os.path.join("temp", uploaded_file.name) | |
if not os.path.exists("temp"): | |
os.makedirs("temp") | |
with open(pdf_path, "wb") as f: | |
f.write(uploaded_file.getbuffer()) | |
text = extract_text_from_pdf(pdf_path) | |
chunks = chunk_text(text) | |
all_chunks.extend(chunks) | |
# Process YouTube video | |
if youtube_url: | |
yt_text = extract_text_from_youtube(youtube_url) | |
yt_chunks = chunk_text(yt_text) | |
all_chunks.extend(yt_chunks) | |
if all_chunks: | |
embeddings = get_embeddings(all_chunks, model) | |
faiss_index = create_faiss_index(embeddings) | |
query_text = st.text_input("Enter your query:") | |
if query_text: | |
query_embedding = get_embeddings([query_text], model) | |
distances, indices = query_faiss_index(faiss_index, query_embedding) | |
similar_chunks = [all_chunks[i] for i in indices[0]] | |
# Ensure we only use a manageable number of chunks | |
num_chunks_to_use = min(5, len(similar_chunks)) | |
selected_chunks = similar_chunks[:num_chunks_to_use] | |
template = """Based on the following chunks: {similar_chunks} | |
Question: {question} | |
Answer:""" | |
prompt_text = template.format(similar_chunks="\n".join(selected_chunks), question=query_text) | |
# Generate response from Hugging Face API | |
response = query_huggingface_api(prompt_text, API_URL, headers) | |
if "Error" not in response: | |
st.write("**Answer:**", response) | |
# Add button to download response as PDF | |
if st.button("Download Response as PDF"): | |
pdf_path = os.path.join("temp", "response.pdf") | |
generate_pdf(response, pdf_path) | |
with open(pdf_path, "rb") as f: | |
st.download_button(label="Download PDF", data=f, file_name="response.pdf") | |
# Add button to speak the response text | |
if st.button("Speak Response"): | |
speak_text(response) | |
else: | |
st.error(response) | |