Spaces:
Sleeping
Sleeping
#!/usr/bin/env python | |
# coding: utf-8 | |
# In[7]: | |
import gradio as gr | |
import pandas as pd | |
from sentence_transformers import SentenceTransformer, util | |
import torch | |
# 載入語義搜索模型 | |
model_checkpoint = "sickcell69/cti-semantic-search-minilm" | |
model = SentenceTransformer(model_checkpoint) | |
# 載入數據 | |
data_path = 'labeled_cti_data.json' | |
data = pd.read_json(data_path) | |
# 載入嵌入文件 | |
embeddings_path = 'corpus_embeddings.pt' | |
corpus_embeddings = torch.load(embeddings_path) | |
def semantic_search(query): | |
query_embedding = model.encode(query, convert_to_tensor=True) | |
search_hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=5) | |
results = [] | |
for hit in search_hits[0]: | |
text = " ".join(data.iloc[hit['corpus_id']]['tokens']) | |
results.append(f"Score: {hit['score']:.4f} - Text: {text}") | |
return "\n".join(results) | |
iface = gr.Interface( | |
fn=semantic_search, | |
inputs="text", | |
outputs="text", | |
title="語義搜索應用", | |
description="輸入一個查詢,然後模型將返回最相似的結果。" | |
) | |
if __name__ == "__main__": | |
#iface.launch() | |
iface.launch(share=True) #網頁跑不出來 | |
# In[ ]: | |