import gradio as gr import pandas as pd from sentence_transformers import SentenceTransformer, util import torch # 載入語義搜索模型 model_checkpoint = "sickcell69/cti-semantic-search-minilm" model = SentenceTransformer(model_checkpoint) # 載入數據 data_path = 'labeled_cti_data.json' data = pd.read_json(data_path) # 載入嵌入文件 embeddings_path = 'corpus_embeddings.pt' corpus_embeddings = torch.load(embeddings_path) def semantic_search(query): query_embedding = model.encode(query, convert_to_tensor=True) search_hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=5) results = [] for hit in search_hits[0]: text = " ".join(data.iloc[hit['corpus_id']]['tokens']) results.append(f"Score: {hit['score']:.4f} - Text: {text}") return "\n".join(results) iface = gr.Interface( fn=semantic_search, inputs="text", outputs="text", title="語義搜索應用", description="輸入一個查詢,然後模型將返回最相似的結果。" ) if __name__ == "__main__": #iface.launch() iface.launch(share=True) #網頁跑不出來