Upload README.md
Browse files
README.md
CHANGED
@@ -81,7 +81,7 @@ Using this model becomes easy when you have [sentence-transformers](https://www.
|
|
81 |
pip install -U sentence-transformers
|
82 |
```
|
83 |
|
84 |
-
|
85 |
|
86 |
```python
|
87 |
from sentence_transformers import SentenceTransformer
|
@@ -92,10 +92,57 @@ embeddings = model.encode(sentences)
|
|
92 |
print(embeddings)
|
93 |
```
|
94 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
95 |
### License
|
96 |
|
97 |
This project is licensed under the [MIT License](./LICENSE).
|
98 |
|
99 |
### Copyright
|
100 |
|
101 |
-
(c) 2024 [Finbarrs Oketunji](https://finbarrs.eu).
|
|
|
81 |
pip install -U sentence-transformers
|
82 |
```
|
83 |
|
84 |
+
### Embeddings
|
85 |
|
86 |
```python
|
87 |
from sentence_transformers import SentenceTransformer
|
|
|
92 |
print(embeddings)
|
93 |
```
|
94 |
|
95 |
+
### Advanced Usage
|
96 |
+
|
97 |
+
```python
|
98 |
+
from sentence_transformers import SentenceTransformer, util
|
99 |
+
import torch
|
100 |
+
|
101 |
+
# Define sentences in Igbo
|
102 |
+
sentences = [
|
103 |
+
"Gịnị bụ olu obodo England?",
|
104 |
+
"Kedu anụmanụ kachasị ọkụ n'ụwa?",
|
105 |
+
"Olee otú e si amụta asụsụ Igbo?",
|
106 |
+
"Gịnị bụ nri kachasị ewu ewu na Naịjirịa?",
|
107 |
+
"Kedu ụdị uwe a na-eyi maka emume Igbo?"
|
108 |
+
]
|
109 |
+
|
110 |
+
# Load the Igbo-trained model
|
111 |
+
model = SentenceTransformer('0xnu/pmmlv2-fine-tuned-igbo')
|
112 |
+
|
113 |
+
# Compute embeddings
|
114 |
+
embeddings = model.encode(sentences, convert_to_tensor=True)
|
115 |
+
|
116 |
+
# Function to find the closest sentence
|
117 |
+
def find_closest_sentence(query_embedding, sentence_embeddings, sentences):
|
118 |
+
# Compute cosine similarities
|
119 |
+
cosine_scores = util.pytorch_cos_sim(query_embedding, sentence_embeddings)[0]
|
120 |
+
# Find the position of the highest score
|
121 |
+
best_match_index = torch.argmax(cosine_scores).item()
|
122 |
+
return sentences[best_match_index], cosine_scores[best_match_index].item()
|
123 |
+
|
124 |
+
query = "Gịnị bụ olu obodo England?"
|
125 |
+
query_embedding = model.encode(query, convert_to_tensor=True)
|
126 |
+
closest_sentence, similarity_score = find_closest_sentence(query_embedding, embeddings, sentences)
|
127 |
+
|
128 |
+
print(f"Ajụjụ: {query}")
|
129 |
+
print(f"Ahịrịokwu yiri ya kachasị: {closest_sentence}")
|
130 |
+
print(f"Skọọ nyiri: {similarity_score:.4f}")
|
131 |
+
|
132 |
+
# You can also try with a new sentence not in the original list
|
133 |
+
new_query = "Kedu aha eze nọ n'obodo Enugwu?"
|
134 |
+
new_query_embedding = model.encode(new_query, convert_to_tensor=True)
|
135 |
+
closest_sentence, similarity_score = find_closest_sentence(new_query_embedding, embeddings, sentences)
|
136 |
+
|
137 |
+
print(f"\nAjụjụ ọhụrụ: {new_query}")
|
138 |
+
print(f"Ahịrịokwu yiri ya kachasị: {closest_sentence}")
|
139 |
+
print(f"Skọọ nyiri: {similarity_score:.4f}")
|
140 |
+
```
|
141 |
+
|
142 |
### License
|
143 |
|
144 |
This project is licensed under the [MIT License](./LICENSE).
|
145 |
|
146 |
### Copyright
|
147 |
|
148 |
+
(c) 2024 [Finbarrs Oketunji](https://finbarrs.eu).
|