Update app.py
Browse files
app.py
CHANGED
@@ -14,8 +14,35 @@ df['embeding_context'] = df['embeding_context'].astype(str).fillna('')
|
|
14 |
|
15 |
# Filter out any rows where 'embeding_context' might be empty or invalid
|
16 |
df = df[df['embeding_context'] != '']
|
17 |
-
|
18 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
|
21 |
# Function to perform search and return all columns
|
@@ -23,7 +50,7 @@ def search_query(query_text):
|
|
23 |
num_records = 50
|
24 |
|
25 |
# Encode the input query text
|
26 |
-
embeddings_query = model.encode([query_text], batch_size=12, max_length=
|
27 |
embeddings_query_np = np.array(embeddings_query).astype('float32')
|
28 |
|
29 |
# Search in FAISS index for nearest neighbors
|
@@ -41,16 +68,16 @@ def gradio_interface(query_text):
|
|
41 |
|
42 |
with gr.Blocks() as app:
|
43 |
gr.Markdown("<h1>White Stride Red Search (BEG-M3)</h1>")
|
44 |
-
|
45 |
# Input text box for the search query
|
46 |
search_input = gr.Textbox(label="Search Query", placeholder="Enter search text", interactive=True)
|
47 |
-
|
48 |
# Search button below the text box
|
49 |
search_button = gr.Button("Search")
|
50 |
-
|
51 |
# Output table for displaying results
|
52 |
search_output = gr.DataFrame(label="Search Results")
|
53 |
-
|
54 |
# Link button click to action
|
55 |
search_button.click(fn=gradio_interface, inputs=search_input, outputs=search_output)
|
56 |
|
|
|
14 |
|
15 |
# Filter out any rows where 'embeding_context' might be empty or invalid
|
16 |
df = df[df['embeding_context'] != '']
|
17 |
+
|
18 |
+
# Encode the 'embeding_context' column
|
19 |
+
embedding_contexts = df['embeding_context'].tolist()
|
20 |
+
embeddings_csv = model.encode(embedding_contexts, batch_size=12, max_length=1024)['dense_vecs']
|
21 |
+
|
22 |
+
# Convert embeddings to numpy array
|
23 |
+
embeddings_np = np.array(embeddings_csv).astype('float32')
|
24 |
+
|
25 |
+
# FAISS index file path
|
26 |
+
index_file_path = 'vector_store_bge_m3.index'
|
27 |
+
|
28 |
+
# Check if FAISS index file already exists
|
29 |
+
if os.path.exists(index_file_path):
|
30 |
+
# Load the existing FAISS index from file
|
31 |
+
index = faiss.read_index(index_file_path)
|
32 |
+
print("FAISS index loaded from file.")
|
33 |
+
else:
|
34 |
+
# Initialize FAISS index (for L2 similarity)
|
35 |
+
dim = embeddings_np.shape[1]
|
36 |
+
index = faiss.IndexFlatL2(dim)
|
37 |
+
|
38 |
+
# Add embeddings to the FAISS index
|
39 |
+
index.add(embeddings_np)
|
40 |
+
|
41 |
+
# Save the FAISS index to a file for future use
|
42 |
+
faiss.write_index(index, index_file_path)
|
43 |
+
print("FAISS index created and saved to file.")
|
44 |
+
|
45 |
+
index = faiss.read_index(index_file_path)
|
46 |
|
47 |
|
48 |
# Function to perform search and return all columns
|
|
|
50 |
num_records = 50
|
51 |
|
52 |
# Encode the input query text
|
53 |
+
embeddings_query = model.encode([query_text], batch_size=12, max_length=1024)['dense_vecs']
|
54 |
embeddings_query_np = np.array(embeddings_query).astype('float32')
|
55 |
|
56 |
# Search in FAISS index for nearest neighbors
|
|
|
68 |
|
69 |
with gr.Blocks() as app:
|
70 |
gr.Markdown("<h1>White Stride Red Search (BEG-M3)</h1>")
|
71 |
+
|
72 |
# Input text box for the search query
|
73 |
search_input = gr.Textbox(label="Search Query", placeholder="Enter search text", interactive=True)
|
74 |
+
|
75 |
# Search button below the text box
|
76 |
search_button = gr.Button("Search")
|
77 |
+
|
78 |
# Output table for displaying results
|
79 |
search_output = gr.DataFrame(label="Search Results")
|
80 |
+
|
81 |
# Link button click to action
|
82 |
search_button.click(fn=gradio_interface, inputs=search_input, outputs=search_output)
|
83 |
|