Spaces:
Build error
Build error
hellopahe
commited on
Commit
·
94692cf
1
Parent(s):
d777f98
remove redundancy
Browse files- lex_rank.py +5 -7
- lex_rank_L12.py +4 -7
- lex_rank_text2vec_v1.py +4 -7
- lex_rank_util.py +14 -6
- test.py +8 -0
lex_rank.py
CHANGED
@@ -5,7 +5,7 @@ nltk.download('punkt')
|
|
5 |
|
6 |
|
7 |
from harvesttext import HarvestText
|
8 |
-
from lex_rank_util import degree_centrality_scores,
|
9 |
from sentence_transformers import SentenceTransformer, util
|
10 |
|
11 |
|
@@ -30,15 +30,13 @@ class LexRank(object):
|
|
30 |
# We argsort so that the first element is the sentence with the highest score
|
31 |
most_central_sentence_indices = numpy.argsort(-centrality_scores)
|
32 |
|
33 |
-
|
34 |
res = []
|
35 |
-
for index in
|
36 |
-
|
37 |
-
break
|
38 |
-
res.append(find_siblings(sentences, index, siblings)[1])
|
39 |
-
num -= 1
|
40 |
return res
|
41 |
|
|
|
42 |
def contains_chinese(self, content: str):
|
43 |
for _char in content:
|
44 |
if '\u4e00' <= _char <= '\u9fa5':
|
|
|
5 |
|
6 |
|
7 |
from harvesttext import HarvestText
|
8 |
+
from lex_rank_util import degree_centrality_scores, find_siblings_by_index
|
9 |
from sentence_transformers import SentenceTransformer, util
|
10 |
|
11 |
|
|
|
30 |
# We argsort so that the first element is the sentence with the highest score
|
31 |
most_central_sentence_indices = numpy.argsort(-centrality_scores)
|
32 |
|
33 |
+
central_and_siblings = find_siblings_by_index(sentences, most_central_sentence_indices, siblings, num)
|
34 |
res = []
|
35 |
+
for index in central_and_siblings:
|
36 |
+
res.append(sentences[index])
|
|
|
|
|
|
|
37 |
return res
|
38 |
|
39 |
+
|
40 |
def contains_chinese(self, content: str):
|
41 |
for _char in content:
|
42 |
if '\u4e00' <= _char <= '\u9fa5':
|
lex_rank_L12.py
CHANGED
@@ -3,7 +3,7 @@ nltk.download('punkt')
|
|
3 |
|
4 |
|
5 |
from harvesttext import HarvestText
|
6 |
-
from lex_rank_util import degree_centrality_scores,
|
7 |
from sentence_transformers import SentenceTransformer, util
|
8 |
|
9 |
|
@@ -28,13 +28,10 @@ class LexRankL12(object):
|
|
28 |
# We argsort so that the first element is the sentence with the highest score
|
29 |
most_central_sentence_indices = numpy.argsort(-centrality_scores)
|
30 |
|
31 |
-
|
32 |
res = []
|
33 |
-
for index in
|
34 |
-
|
35 |
-
break
|
36 |
-
res.append(find_siblings(sentences, index, siblings)[1])
|
37 |
-
num -= 1
|
38 |
return res
|
39 |
|
40 |
def contains_chinese(self, content: str):
|
|
|
3 |
|
4 |
|
5 |
from harvesttext import HarvestText
|
6 |
+
from lex_rank_util import degree_centrality_scores, find_siblings_by_index
|
7 |
from sentence_transformers import SentenceTransformer, util
|
8 |
|
9 |
|
|
|
28 |
# We argsort so that the first element is the sentence with the highest score
|
29 |
most_central_sentence_indices = numpy.argsort(-centrality_scores)
|
30 |
|
31 |
+
central_and_siblings = find_siblings_by_index(sentences, most_central_sentence_indices, siblings, num)
|
32 |
res = []
|
33 |
+
for index in central_and_siblings:
|
34 |
+
res.append(sentences[index])
|
|
|
|
|
|
|
35 |
return res
|
36 |
|
37 |
def contains_chinese(self, content: str):
|
lex_rank_text2vec_v1.py
CHANGED
@@ -3,7 +3,7 @@ nltk.download('punkt')
|
|
3 |
|
4 |
|
5 |
from harvesttext import HarvestText
|
6 |
-
from lex_rank_util import degree_centrality_scores,
|
7 |
from sentence_transformers import SentenceTransformer, util
|
8 |
|
9 |
|
@@ -28,13 +28,10 @@ class LexRankText2VecV1(object):
|
|
28 |
# We argsort so that the first element is the sentence with the highest score
|
29 |
most_central_sentence_indices = numpy.argsort(-centrality_scores)
|
30 |
|
31 |
-
|
32 |
res = []
|
33 |
-
for index in
|
34 |
-
|
35 |
-
break
|
36 |
-
res.append(find_siblings(sentences, index, siblings)[1])
|
37 |
-
num -= 1
|
38 |
return res
|
39 |
|
40 |
def contains_chinese(self, content: str):
|
|
|
3 |
|
4 |
|
5 |
from harvesttext import HarvestText
|
6 |
+
from lex_rank_util import degree_centrality_scores, find_siblings_by_index
|
7 |
from sentence_transformers import SentenceTransformer, util
|
8 |
|
9 |
|
|
|
28 |
# We argsort so that the first element is the sentence with the highest score
|
29 |
most_central_sentence_indices = numpy.argsort(-centrality_scores)
|
30 |
|
31 |
+
central_and_siblings = find_siblings_by_index(sentences, most_central_sentence_indices, siblings, num)
|
32 |
res = []
|
33 |
+
for index in central_and_siblings:
|
34 |
+
res.append(sentences[index])
|
|
|
|
|
|
|
35 |
return res
|
36 |
|
37 |
def contains_chinese(self, content: str):
|
lex_rank_util.py
CHANGED
@@ -124,9 +124,17 @@ def stationary_distribution(
|
|
124 |
return distribution
|
125 |
|
126 |
|
127 |
-
def
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
-
|
132 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
return distribution
|
125 |
|
126 |
|
127 |
+
def find_siblings_by_index(sentences: [str], central_indices: [int], siblings: int, num: int):
|
128 |
+
ret = []
|
129 |
+
for idx in central_indices:
|
130 |
+
if num < 0:
|
131 |
+
break
|
132 |
+
head = max(idx - siblings, 0)
|
133 |
+
tail = min(idx + siblings + 1, len(sentences))
|
134 |
+
for i in range(head, tail):
|
135 |
+
if i not in ret:
|
136 |
+
ret.append(i)
|
137 |
+
num -= 1
|
138 |
+
|
139 |
+
print(ret)
|
140 |
+
return ret
|
test.py
ADDED
@@ -0,0 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
s = set()
|
2 |
+
for i in range
|
3 |
+
|
4 |
+
arr = [i for i in s]
|
5 |
+
|
6 |
+
print(type(arr))
|
7 |
+
arr.sort(reverse=True)
|
8 |
+
print(arr)
|