sickcell69 commited on
Commit
2678b8b
1 Parent(s): e037f51

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +11 -4
app.py CHANGED
@@ -23,15 +23,22 @@ embeddings_path = 'corpus_embeddings.pt'
23
  corpus_embeddings = torch.load(embeddings_path, map_location=torch.device('cpu'))
24
 
25
  def semantic_search(query):
 
 
26
  query_embedding = model.encode(query, convert_to_tensor=True)
27
- search_hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=5)
28
 
29
  results = []
30
  for hit in search_hits[0]:
31
- text = " ".join(data.iloc[hit['corpus_id']]['tokens'])
32
- results.append(f"Score: {hit['score']:.4f} - Text: {text}")
 
 
 
 
 
33
 
34
- return "\n".join(results)
35
 
36
  iface = gr.Interface(
37
  fn=semantic_search,
 
23
  corpus_embeddings = torch.load(embeddings_path, map_location=torch.device('cpu'))
24
 
25
  def semantic_search(query):
26
+ print("Data columns:", data.columns)
27
+ print("First few rows:", data.head())
28
  query_embedding = model.encode(query, convert_to_tensor=True)
29
+ search_hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=3)
30
 
31
  results = []
32
  for hit in search_hits[0]:
33
+ # 使用更安全的方法來訪問數據
34
+ row = data.iloc[hit['corpus_id']]
35
+ if 'tokens' in row:
36
+ text = " ".join(row['tokens'])
37
+ else:
38
+ text = str(row) # 如果沒有 'tokens',就轉換整行為字符串
39
+ results.append((hit['score'], text))
40
 
41
+ return results
42
 
43
  iface = gr.Interface(
44
  fn=semantic_search,