sickcell69 commited on
Commit
ac171fd
1 Parent(s): 2678b8b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +21 -39
app.py CHANGED
@@ -1,17 +1,11 @@
1
- #!/usr/bin/env python
2
- # coding: utf-8
3
-
4
- # In[7]:
5
-
6
-
7
- import gradio as gr
8
  import pandas as pd
9
  from sentence_transformers import SentenceTransformer, util
10
  import torch
11
 
 
12
  # 載入語義搜索模型
13
  model_checkpoint = "sickcell69/cti-semantic-search-minilm"
14
- #model_checkpoint = "sickcell69/bert-finetuned-ner"
15
  model = SentenceTransformer(model_checkpoint)
16
 
17
  # 載入數據
@@ -20,41 +14,29 @@ data = pd.read_json(data_path)
20
 
21
  # 載入嵌入文件
22
  embeddings_path = 'corpus_embeddings.pt'
23
- corpus_embeddings = torch.load(embeddings_path, map_location=torch.device('cpu'))
24
 
25
- def semantic_search(query):
26
- print("Data columns:", data.columns)
27
- print("First few rows:", data.head())
 
 
 
 
 
 
28
  query_embedding = model.encode(query, convert_to_tensor=True)
29
- search_hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=3)
30
-
31
  results = []
32
  for hit in search_hits[0]:
33
- # 使用更安全的方法來訪問數據
34
- row = data.iloc[hit['corpus_id']]
35
- if 'tokens' in row:
36
- text = " ".join(row['tokens'])
37
- else:
38
- text = str(row) # 如果沒有 'tokens',就轉換整行為字符串
39
- results.append((hit['score'], text))
40
 
41
- return results
42
-
43
- iface = gr.Interface(
44
- fn=semantic_search,
45
- inputs="text",
46
- outputs="text",
47
- title="語義搜索應用",
48
- description="輸入一個查詢,然後模型將返回最相似的結果。"
49
- )
50
 
51
  if __name__ == "__main__":
52
- #iface.launch()
53
- iface.launch(share=True) #網頁跑不出來
54
-
55
-
56
- # In[ ]:
57
-
58
-
59
-
60
-
 
1
+ from flask import Flask, request, jsonify, render_template
 
 
 
 
 
 
2
  import pandas as pd
3
  from sentence_transformers import SentenceTransformer, util
4
  import torch
5
 
6
+
7
  # 載入語義搜索模型
8
  model_checkpoint = "sickcell69/cti-semantic-search-minilm"
 
9
  model = SentenceTransformer(model_checkpoint)
10
 
11
  # 載入數據
 
14
 
15
  # 載入嵌入文件
16
  embeddings_path = 'corpus_embeddings.pt'
17
+ corpus_embeddings = torch.load(embeddings_path)
18
 
19
+ app = Flask(__name__)
20
+
21
+ @app.route('/')
22
+ def home():
23
+ return render_template('index.html')
24
+
25
+ @app.route('/search', methods=['GET'])
26
+ def search():
27
+ query = request.args.get('query')
28
  query_embedding = model.encode(query, convert_to_tensor=True)
29
+ search_hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=5)
30
+
31
  results = []
32
  for hit in search_hits[0]:
33
+ text = " ".join(data.iloc[hit['corpus_id']]['tokens'])
34
+ results.append({
35
+ "text": text,
36
+ "score": hit['score']
37
+ })
 
 
38
 
39
+ return jsonify(results)
 
 
 
 
 
 
 
 
40
 
41
  if __name__ == "__main__":
42
+ app.run(debug=True, host='0.0.0.0')