Spaces:

tarrasyed19472007
/

movie_practice

Runtime error

App Files Files Community

tarrasyed19472007 commited on 11 days ago

Commit

a713626

•

1 Parent(s): 7ef5343

Create app.py

Browse files

Files changed (1) hide show

app.py +67 -0

app.py ADDED Viewed

	@@ -0,0 +1,67 @@

+import pandas as pd
+from sentence_transformers import SentenceTransformer
+import faiss
+import numpy as np
+import streamlit as st
+# Load CSV data using a relative path
+csv_file = "Hydra-Movie-Scrape.csv"  # Ensure this file is in the same directory as app.py
+df = pd.read_csv(csv_file)
+# Use 'Summary' or 'Short Summary' as the source for documents
+# Fill NaNs with "No summary available."
+df['Summary'] = df['Summary'].fillna("No summary available.")
+documents = df['Summary'].tolist()  # Use 'Summary' for document embeddings
+# Initialize the SentenceTransformer model
+model = SentenceTransformer('all-MiniLM-L6-v2')
+# Create and cache embeddings
+@st.cache_resource
+def create_embeddings(documents):
+    embeddings = model.encode(documents, show_progress_bar=True)
+    return embeddings
+# Generate and cache embeddings
+doc_embeddings = create_embeddings(documents)
+# Convert to NumPy array (FAISS requires float32)
+embedding_matrix = np.array(doc_embeddings).astype("float32")
+# Build FAISS index for efficient similarity search
+index = faiss.IndexFlatL2(embedding_matrix.shape[1])
+index.add(embedding_matrix)
+# Function to retrieve the most relevant documents
+def retrieve(query, top_k=10):  # Retrieve up to 10 movies
+    query_embedding = model.encode(query)  # Encode the query
+    query_vector = np.array(query_embedding).astype("float32")
+    distances, indices = index.search(np.array([query_vector]), top_k)
+    return indices[0]
+# Streamlit application layout
+st.title("Movie Dataset RAG Application")
+query = st.text_input("Ask a question about movies:")
+if st.button("Submit"):
+    if query:
+        indices = retrieve(query)
+        # Prepare and display detailed responses
+        response = ""
+        for idx in indices:
+            if idx != -1:  # Check if the index is valid
+                movie_details = df.iloc[idx]
+                response += f"**Title**: {movie_details['Title']}\n"
+                response += f"**Year**: {movie_details['Year']}\n"
+                response += f"**Director**: {movie_details['Director']}\n"
+                response += f"**Cast**: {movie_details['Cast']}\n"
+                response += f"**Summary**: {movie_details['Summary']}\n\n"
+        # Formatting the output with clearer separation
+        if response:
+            st.write("Here are some movies that match your query:\n")
+            st.markdown(response)  # Use markdown to format the output nicely
+        else:
+            st.write("No relevant documents found.")
+    else:
+        st.write("Please enter a query.")