Spaces:
Sleeping
Sleeping
sickcell69
commited on
Commit
•
2678b8b
1
Parent(s):
e037f51
Update app.py
Browse files
app.py
CHANGED
@@ -23,15 +23,22 @@ embeddings_path = 'corpus_embeddings.pt'
|
|
23 |
corpus_embeddings = torch.load(embeddings_path, map_location=torch.device('cpu'))
|
24 |
|
25 |
def semantic_search(query):
|
|
|
|
|
26 |
query_embedding = model.encode(query, convert_to_tensor=True)
|
27 |
-
search_hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=
|
28 |
|
29 |
results = []
|
30 |
for hit in search_hits[0]:
|
31 |
-
|
32 |
-
|
|
|
|
|
|
|
|
|
|
|
33 |
|
34 |
-
return
|
35 |
|
36 |
iface = gr.Interface(
|
37 |
fn=semantic_search,
|
|
|
23 |
corpus_embeddings = torch.load(embeddings_path, map_location=torch.device('cpu'))
|
24 |
|
25 |
def semantic_search(query):
|
26 |
+
print("Data columns:", data.columns)
|
27 |
+
print("First few rows:", data.head())
|
28 |
query_embedding = model.encode(query, convert_to_tensor=True)
|
29 |
+
search_hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=3)
|
30 |
|
31 |
results = []
|
32 |
for hit in search_hits[0]:
|
33 |
+
# 使用更安全的方法來訪問數據
|
34 |
+
row = data.iloc[hit['corpus_id']]
|
35 |
+
if 'tokens' in row:
|
36 |
+
text = " ".join(row['tokens'])
|
37 |
+
else:
|
38 |
+
text = str(row) # 如果沒有 'tokens',就轉換整行為字符串
|
39 |
+
results.append((hit['score'], text))
|
40 |
|
41 |
+
return results
|
42 |
|
43 |
iface = gr.Interface(
|
44 |
fn=semantic_search,
|