|
import faiss |
|
import pandas as pd |
|
import numpy as np |
|
from sentence_transformers import SentenceTransformer |
|
import time |
|
|
|
|
|
start_time = time.time() |
|
|
|
|
|
index_path = "embeddings/multilingual-e5-small_vector_db.index" |
|
|
|
|
|
try: |
|
index = faiss.read_index(index_path) |
|
print(f"FAISS index loaded successfully from {index_path} - Time passed: {time.time() - start_time:.2f} seconds") |
|
except Exception as e: |
|
print(f"Error loading FAISS index: {e} - Time passed: {time.time() - start_time:.2f} seconds") |
|
|
|
|
|
try: |
|
model = SentenceTransformer('intfloat/multilingual-e5-small', local_files_only=True) |
|
|
|
print(f"Model loaded successfully - Time passed: {time.time() - start_time:.2f} seconds") |
|
except Exception as e: |
|
print(f"Error loading model: {e} - Time passed: {time.time() - start_time:.2f} seconds") |
|
|
|
|
|
new_text = ["Cat am de plata"] |
|
print(f'The text is: {new_text} - Time passed: {time.time() - start_time:.2f} seconds') |
|
|
|
|
|
try: |
|
new_embeddings = model.encode(new_text) |
|
print(f"Generated embeddings for new text: - Time passed: {time.time() - start_time:.2f} seconds") |
|
except Exception as e: |
|
print(f"Error generating embeddings: {e} - Time passed: {time.time() - start_time:.2f} seconds") |
|
|
|
|
|
try: |
|
new_embeddings = np.array(new_embeddings).astype('float32') |
|
print(f"Converted new embeddings to float32: - Time passed: {time.time() - start_time:.2f} seconds") |
|
except Exception as e: |
|
print(f"Error converting embeddings to float32: {e} - Time passed: {time.time() - start_time:.2f} seconds") |
|
|
|
|
|
try: |
|
k = 5 |
|
D, I = index.search(new_embeddings, k) |
|
print(f"Similarity search results: Indices - {I}, Distances - {D} - Time passed: {time.time() - start_time:.2f} seconds") |
|
except Exception as e: |
|
print(f"Error performing similarity search: {e} - Time passed: {time.time() - start_time:.2f} seconds") |
|
|
|
|
|
|
|
csv_file_path = r'C:\Users\serban.tica\Documents\tobi_llm_intent_recognition\data\Pager_Intents_Cleaned.csv' |
|
try: |
|
data = pd.read_csv(csv_file_path) |
|
print(f"CSV file loaded successfully from {csv_file_path}") |
|
except Exception as e: |
|
print(f"Error loading CSV file: {e}") |
|
|
|
|
|
|
|
|
|
'''t# Retrieve the corresponding rows from the DataFrame |
|
try: |
|
for i, query in enumerate(new_text): |
|
print(f"Query: {query} - Time passed: {time.time() - start_time:.2f} seconds") |
|
for idx in I[i]: |
|
print(f"Index: {idx}, Row: {df.iloc[idx]} - Time passed: {time.time() - start_time:.2f} seconds") |
|
except Exception as e: |
|
print(f"Error retrieving rows from DataFrame: {e} - Time passed: {time.time() - start_time:.2f} seconds")''' |
|
|
|
|
|
intents = data['intent'].tolist() |
|
intent = intents[I[0][0]] |
|
distance = D[0][0] |
|
|
|
similarity = 1 / (1 + distance) |
|
|
|
print(f"Intenția identificată: {intent}") |
|
print(f"Nivel de încredere: {similarity:.4f}- Time passed: {time.time() - start_time:.2f} seconds") |