Danil commited on
Commit
f27e27b
·
1 Parent(s): 35a4e1d

Delete indexer.py

Browse files
Files changed (1) hide show
  1. indexer.py +0 -64
indexer.py DELETED
@@ -1,64 +0,0 @@
1
- import pickle
2
- import faiss
3
- import numpy as np
4
- # from grammar import remove_verbs, clean_text
5
- from utils import *
6
- from sentence_transformers import SentenceTransformer
7
-
8
-
9
- class FAISS:
10
- def __init__(self, dimensions: int):
11
- self.dimensions = dimensions
12
- self.index = faiss.IndexFlatL2(dimensions)
13
- self.vectors = {}
14
- self.counter = 0
15
- self.model_name = 'paraphrase-multilingual-MiniLM-L12-v2'
16
- self.sentence_encoder = SentenceTransformer(self.model_name)
17
-
18
- def init_vectors(self, path):
19
- with open(path, 'rb') as pkl_file:
20
- self.vectors = pickle.load(pkl_file)
21
-
22
- def init_index(self, path):
23
- self.index = faiss.read_index(path)
24
-
25
- def add(self, text, idx, pop, emb=None):
26
- if emb is None:
27
- text_vec = self.sentence_encoder.encode([text])
28
- else:
29
- text_vec = emb
30
- self.index.add(text_vec)
31
- self.vectors[self.counter] = (idx, text, pop, text_vec)
32
- self.counter += 1
33
-
34
- def search(self, v: list, k: int = 10):
35
- result = []
36
- distance, item_index = self.index.search(v, k)
37
- for dist, i in zip(distance[0], item_index[0]):
38
- if i == -1:
39
- break
40
- else:
41
- result.append((self.vectors[i][0], self.vectors[i][1], self.vectors[i][2], dist))
42
-
43
- return result
44
-
45
- def suggest_tags(self, query, top_n=10, k=30) -> list:
46
-
47
- emb = self.sentence_encoder.encode([query.lower()])
48
- r = self.search(emb, k)
49
-
50
- result = []
51
- for i in r:
52
- if check(query, i[1]):
53
- result.append(i)
54
- # надо добавить вес относительно длины
55
- result = sorted(result, key=lambda x: x[0] * 0.3 - x[-1], reverse=True)
56
- total_result = []
57
- for i in range(len(result)):
58
- flag = True
59
- for j in result[i + 1:]:
60
- flag &= sweet_check(result[i][1], j[1])
61
- if flag:
62
- total_result.append(result[i][1])
63
-
64
- return total_result[:top_n]