File size: 1,254 Bytes
f99b8f7
 
 
 
 
 
 
 
 
 
 
 
c64b53c
 
f99b8f7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
#!/usr/bin/env python
# coding: utf-8

# In[7]:


import gradio as gr
import pandas as pd
from sentence_transformers import SentenceTransformer, util
import torch

# 載入語義搜索模型
model_checkpoint = "sickcell69/cti-semantic-search-minilm"
#model_checkpoint = "sickcell69/bert-finetuned-ner"
model = SentenceTransformer(model_checkpoint)

# 載入數據
data_path = 'labeled_cti_data.json'
data = pd.read_json(data_path)

# 載入嵌入文件
embeddings_path = 'corpus_embeddings.pt'
corpus_embeddings = torch.load(embeddings_path)

def semantic_search(query):
    query_embedding = model.encode(query, convert_to_tensor=True)
    search_hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=5)
    
    results = []
    for hit in search_hits[0]:
        text = " ".join(data.iloc[hit['corpus_id']]['tokens'])
        results.append(f"Score: {hit['score']:.4f} - Text: {text}")
    
    return "\n".join(results)

iface = gr.Interface(
    fn=semantic_search,
    inputs="text",
    outputs="text",
    title="語義搜索應用",
    description="輸入一個查詢,然後模型將返回最相似的結果。"
)

if __name__ == "__main__":
    #iface.launch()
    iface.launch(share=True) #網頁跑不出來


# In[ ]: