from flask import Flask, request, jsonify, render_template import pandas as pd from sentence_transformers import SentenceTransformer, util import torch # 載入語義搜索模型 model_checkpoint = "sickcell69/cti-semantic-search-minilm" model = SentenceTransformer(model_checkpoint) # 載入數據 data_path = 'labeled_cti_data.json' data = pd.read_json(data_path) # 載入嵌入文件 embeddings_path = 'corpus_embeddings.pt' corpus_embeddings = torch.load(embeddings_path) app = Flask(__name__) @app.route('/') def home(): return render_template('index.html') @app.route('/search', methods=['GET']) def search(): query = request.args.get('query') query_embedding = model.encode(query, convert_to_tensor=True) search_hits = util.semantic_search(query_embedding, corpus_embeddings, top_k=5) results = [] for hit in search_hits[0]: text = " ".join(data.iloc[hit['corpus_id']]['tokens']) results.append({ "text": text, "score": hit['score'] }) return jsonify(results) if __name__ == "__main__": app.run(debug=True, host='0.0.0.0')