Daniele Licari
commited on
Commit
·
f1c8288
1
Parent(s):
8c90089
Update README.md
Browse files
README.md
CHANGED
@@ -75,15 +75,14 @@ def sentence_embeddings(sentences, model_name, max_length=512):
|
|
75 |
|
76 |
|
77 |
def plot_similarity(sentences, model_name):
|
78 |
-
#
|
79 |
embeddings = sentence_embeddings(sentences, model_name)
|
80 |
-
#
|
81 |
corr = cosine_similarity(embeddings, embeddings)
|
82 |
|
83 |
-
#
|
84 |
-
sns.set(font_scale=1.2)
|
85 |
-
# for text axis labels wrapping
|
86 |
-
labels = [ '\n'.join(wrap(l, 40)) for l in sentences]
|
87 |
g = sns.heatmap(
|
88 |
corr,
|
89 |
xticklabels=labels,
|
@@ -95,7 +94,7 @@ def plot_similarity(sentences, model_name):
|
|
95 |
g.set_title(f"Semantic Textual Similarity ({model_short_name})")
|
96 |
plt.show()
|
97 |
|
98 |
-
|
99 |
sent = [
|
100 |
# 1. "The court shall pronounce the judgment for the dissolution or termination of the civil effects of marriage."
|
101 |
"Il tribunale pronuncia la sentenza per lo scioglimento o la cessazione degli effetti civili del matrimonio",
|
@@ -108,9 +107,11 @@ sent = [
|
|
108 |
"Il ricorrente ha perso la causa"
|
109 |
]
|
110 |
|
111 |
-
|
112 |
model_name = "dlicari/Italian-Legal-BERT"
|
113 |
plot_similarity(sent, model_name)
|
|
|
|
|
114 |
model_name = 'dbmdz/bert-base-italian-xxl-cased'
|
115 |
plot_similarity(sent, model_name)
|
116 |
```
|
|
|
75 |
|
76 |
|
77 |
def plot_similarity(sentences, model_name):
|
78 |
+
# Get sentence embeddings produced by the model
|
79 |
embeddings = sentence_embeddings(sentences, model_name)
|
80 |
+
# Perfom similarity score using cosine similarity
|
81 |
corr = cosine_similarity(embeddings, embeddings)
|
82 |
|
83 |
+
# Plot heatmap similarity
|
84 |
+
sns.set(font_scale=1.2)
|
85 |
+
labels = [ '\n'.join(wrap(l, 40)) for l in sentences] # for text axis labels wrapping
|
|
|
86 |
g = sns.heatmap(
|
87 |
corr,
|
88 |
xticklabels=labels,
|
|
|
94 |
g.set_title(f"Semantic Textual Similarity ({model_short_name})")
|
95 |
plt.show()
|
96 |
|
97 |
+
# Sentences to be compared
|
98 |
sent = [
|
99 |
# 1. "The court shall pronounce the judgment for the dissolution or termination of the civil effects of marriage."
|
100 |
"Il tribunale pronuncia la sentenza per lo scioglimento o la cessazione degli effetti civili del matrimonio",
|
|
|
107 |
"Il ricorrente ha perso la causa"
|
108 |
]
|
109 |
|
110 |
+
# Perform Semantic Textual Similarity using 'Italian-Legal-BERT'
|
111 |
model_name = "dlicari/Italian-Legal-BERT"
|
112 |
plot_similarity(sent, model_name)
|
113 |
+
|
114 |
+
# Perform Semantic Textual Similarity using 'bert-base-italian-xxl-cased'
|
115 |
model_name = 'dbmdz/bert-base-italian-xxl-cased'
|
116 |
plot_similarity(sent, model_name)
|
117 |
```
|