Upload folder using huggingface_hub
Browse files- .gitattributes +1 -0
- 1_Pooling/config.json +10 -0
- README.md +431 -0
- config.json +50 -0
- config_sentence_transformers.json +10 -0
- model.safetensors +3 -0
- modules.json +20 -0
- sentence_bert_config.json +4 -0
- special_tokens_map.json +51 -0
- tokenizer.json +3 -0
- tokenizer_config.json +55 -0
.gitattributes
CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
|
|
|
33 |
*.zip filter=lfs diff=lfs merge=lfs -text
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
1_Pooling/config.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"word_embedding_dimension": 768,
|
3 |
+
"pooling_mode_cls_token": true,
|
4 |
+
"pooling_mode_mean_tokens": false,
|
5 |
+
"pooling_mode_max_tokens": false,
|
6 |
+
"pooling_mode_mean_sqrt_len_tokens": false,
|
7 |
+
"pooling_mode_weightedmean_tokens": false,
|
8 |
+
"pooling_mode_lasttoken": false,
|
9 |
+
"include_prompt": true
|
10 |
+
}
|
README.md
ADDED
@@ -0,0 +1,431 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- sentence-transformers
|
4 |
+
- sentence-similarity
|
5 |
+
- feature-extraction
|
6 |
+
- generated_from_trainer
|
7 |
+
- dataset_size:25743
|
8 |
+
- loss:MultipleNegativesRankingLoss
|
9 |
+
base_model: Alibaba-NLP/gte-multilingual-base
|
10 |
+
widget:
|
11 |
+
- source_sentence: م الحين SHIA WAVES ENGLISH Indians throw thousands of idols on
|
12 |
+
the street for not protecting them against the virus |Many people in India have
|
13 |
+
thrown away statues of gods and blamed why the gods with infinite power cannot
|
14 |
+
protect them from the ravages of the coronavirus? All sects must be repositioned,
|
15 |
+
otherwise there will be a bigger crisis in each sect. . . I don't know, when will
|
16 |
+
it be our country's turn? you say?
|
17 |
+
sentences:
|
18 |
+
- Esta mulher sofreu uma convulsão após ser vacinada contra a covid-19 na Argentina
|
19 |
+
- Images of Hindu idols destroyed for not protecting Indian people during the Covid-19
|
20 |
+
pandemic
|
21 |
+
- Forces raid a house in Indian-administered Kashmir
|
22 |
+
- source_sentence: 'En el mismo cuerpo legal atacaremos la raíz del problema: los
|
23 |
+
jefes de las mafias. Tipificaremos el nuevo delito de “autoría por dominio de
|
24 |
+
organización”. Es decir: los jefes de las bandas pagarán también por los delitos
|
25 |
+
que ordenen cometer a sus cómplices.'
|
26 |
+
sentences:
|
27 |
+
- Walmart va demander une preuve de vaccination à ses clients canadiens
|
28 |
+
- Vídeo mostra fraude de mortes na pandemia de Covid-19
|
29 |
+
- La autoría por dominio de organización sería un nuevo delito en Ecuador
|
30 |
+
- source_sentence: Winning
|
31 |
+
sentences:
|
32 |
+
- President Donald Trump has 232 electoral votes, Joe Biden has 212, 226 or 227.
|
33 |
+
- Suspected drunk drivers automatically face one month in jail under new law in
|
34 |
+
Thailand?
|
35 |
+
- Le bilan des violences post-électorales à M'Batto a atteint au moins une trentaine
|
36 |
+
de morts
|
37 |
+
- source_sentence: Pablo Iglesias Iglesias_ No soy partidario de la violencia pero
|
38 |
+
disfrutaría viendo como matan a tiros a los líderes del PP. La derecha debe ser
|
39 |
+
exterminada como un virus. 11:26 AM 24 ene. 12 1.682 Retweets 2.069 Likes 27 go
|
40 |
+
sentences:
|
41 |
+
- Pablo Iglesias tuiteó que disfrutaría de ver como matan de un tiro a líderes del
|
42 |
+
PP y a la derecha española habría que exterminarla como a un virus
|
43 |
+
- Delfines en un puerto de España durante el confinamiento
|
44 |
+
- Jenazah korban virus corona di Rusia
|
45 |
+
- source_sentence: 'ليس داعشياً من بيده المسدس ..انه جندي فرنسي ينفذ اعدامات بحق مواطنين
|
46 |
+
عزل في الجزائر !!! لم يكن حينها لا تنظيم قاعدة ولا دولة اسلامية ولا نصرة ليلصقوا
|
47 |
+
بهم منفردين تهمة الارهاب !! انتم ام واب واخ وابن وجد الارهاب .. Not Daashaa of
|
48 |
+
the pistol in his hand .. he''s a French soldier executions carried out against
|
49 |
+
unarmed civilians in Algeria !!! If not then it does not regulate not base an
|
50 |
+
Islamic state nor a victory for Alsqoa their individual terrorism charge !! You
|
51 |
+
are a mother and father and brother and the son of terror found .. Non Daashaa
|
52 |
+
du pistolet dans sa main .. Il est un soldat français exécutions menées contre
|
53 |
+
des civils non armés en Algérie !!! Si non, alors il ne réglemente pas pas fonder
|
54 |
+
un Etat islamique, ni une victoire pour Alsqoa leur charge individuelle du terrorisme
|
55 |
+
!! Vous êtes une mère et père et le frère et le fils de la terreur trouvé .. #
|
56 |
+
occupant'
|
57 |
+
sentences:
|
58 |
+
- Massacre perpétré par des soldats français en Algérie
|
59 |
+
- Video Of Attack On UP Minister Shrikant Sharma
|
60 |
+
- Map shows there are no wildfires in Canada and Mexico
|
61 |
+
pipeline_tag: sentence-similarity
|
62 |
+
library_name: sentence-transformers
|
63 |
+
---
|
64 |
+
|
65 |
+
# SentenceTransformer based on Alibaba-NLP/gte-multilingual-base
|
66 |
+
|
67 |
+
This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [Alibaba-NLP/gte-multilingual-base](https://huggingface.co/Alibaba-NLP/gte-multilingual-base). It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
|
68 |
+
|
69 |
+
## Model Details
|
70 |
+
|
71 |
+
### Model Description
|
72 |
+
- **Model Type:** Sentence Transformer
|
73 |
+
- **Base model:** [Alibaba-NLP/gte-multilingual-base](https://huggingface.co/Alibaba-NLP/gte-multilingual-base) <!-- at revision ca1791e0bcc104f6db161f27de1340241b13c5a4 -->
|
74 |
+
- **Maximum Sequence Length:** 8192 tokens
|
75 |
+
- **Output Dimensionality:** 768 dimensions
|
76 |
+
- **Similarity Function:** Cosine Similarity
|
77 |
+
<!-- - **Training Dataset:** Unknown -->
|
78 |
+
<!-- - **Language:** Unknown -->
|
79 |
+
<!-- - **License:** Unknown -->
|
80 |
+
|
81 |
+
### Model Sources
|
82 |
+
|
83 |
+
- **Documentation:** [Sentence Transformers Documentation](https://sbert.net)
|
84 |
+
- **Repository:** [Sentence Transformers on GitHub](https://github.com/UKPLab/sentence-transformers)
|
85 |
+
- **Hugging Face:** [Sentence Transformers on Hugging Face](https://huggingface.co/models?library=sentence-transformers)
|
86 |
+
|
87 |
+
### Full Model Architecture
|
88 |
+
|
89 |
+
```
|
90 |
+
SentenceTransformer(
|
91 |
+
(0): Transformer({'max_seq_length': 8192, 'do_lower_case': False}) with Transformer model: NewModel
|
92 |
+
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': True, 'pooling_mode_mean_tokens': False, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
|
93 |
+
(2): Normalize()
|
94 |
+
)
|
95 |
+
```
|
96 |
+
|
97 |
+
## Usage
|
98 |
+
|
99 |
+
### Direct Usage (Sentence Transformers)
|
100 |
+
|
101 |
+
First install the Sentence Transformers library:
|
102 |
+
|
103 |
+
```bash
|
104 |
+
pip install -U sentence-transformers
|
105 |
+
```
|
106 |
+
|
107 |
+
Then you can load this model and run inference.
|
108 |
+
```python
|
109 |
+
from sentence_transformers import SentenceTransformer
|
110 |
+
|
111 |
+
# Download from the 🤗 Hub
|
112 |
+
model = SentenceTransformer("sentence_transformers_model_id")
|
113 |
+
# Run inference
|
114 |
+
sentences = [
|
115 |
+
"ليس داعشياً من بيده المسدس ..انه جندي فرنسي ينفذ اعدامات بحق مواطنين عزل في الجزائر !!! لم يكن حينها لا تنظيم قاعدة ولا دولة اسلامية ولا نصرة ليلصقوا بهم منفردين تهمة الارهاب !! انتم ام واب واخ وابن وجد الارهاب .. Not Daashaa of the pistol in his hand .. he's a French soldier executions carried out against unarmed civilians in Algeria !!! If not then it does not regulate not base an Islamic state nor a victory for Alsqoa their individual terrorism charge !! You are a mother and father and brother and the son of terror found .. Non Daashaa du pistolet dans sa main .. Il est un soldat français exécutions menées contre des civils non armés en Algérie !!! Si non, alors il ne réglemente pas pas fonder un Etat islamique, ni une victoire pour Alsqoa leur charge individuelle du terrorisme !! Vous êtes une mère et père et le frère et le fils de la terreur trouvé .. # occupant",
|
116 |
+
'Massacre perpétré par des soldats français en Algérie',
|
117 |
+
'Video Of Attack On UP Minister Shrikant Sharma',
|
118 |
+
]
|
119 |
+
embeddings = model.encode(sentences)
|
120 |
+
print(embeddings.shape)
|
121 |
+
# [3, 768]
|
122 |
+
|
123 |
+
# Get the similarity scores for the embeddings
|
124 |
+
similarities = model.similarity(embeddings, embeddings)
|
125 |
+
print(similarities.shape)
|
126 |
+
# [3, 3]
|
127 |
+
```
|
128 |
+
|
129 |
+
<!--
|
130 |
+
### Direct Usage (Transformers)
|
131 |
+
|
132 |
+
<details><summary>Click to see the direct usage in Transformers</summary>
|
133 |
+
|
134 |
+
</details>
|
135 |
+
-->
|
136 |
+
|
137 |
+
<!--
|
138 |
+
### Downstream Usage (Sentence Transformers)
|
139 |
+
|
140 |
+
You can finetune this model on your own dataset.
|
141 |
+
|
142 |
+
<details><summary>Click to expand</summary>
|
143 |
+
|
144 |
+
</details>
|
145 |
+
-->
|
146 |
+
|
147 |
+
<!--
|
148 |
+
### Out-of-Scope Use
|
149 |
+
|
150 |
+
*List how the model may foreseeably be misused and address what users ought not to do with the model.*
|
151 |
+
-->
|
152 |
+
|
153 |
+
<!--
|
154 |
+
## Bias, Risks and Limitations
|
155 |
+
|
156 |
+
*What are the known or foreseeable issues stemming from this model? You could also flag here known failure cases or weaknesses of the model.*
|
157 |
+
-->
|
158 |
+
|
159 |
+
<!--
|
160 |
+
### Recommendations
|
161 |
+
|
162 |
+
*What are recommendations with respect to the foreseeable issues? For example, filtering explicit content.*
|
163 |
+
-->
|
164 |
+
|
165 |
+
## Training Details
|
166 |
+
|
167 |
+
### Training Dataset
|
168 |
+
|
169 |
+
#### Unnamed Dataset
|
170 |
+
|
171 |
+
* Size: 25,743 training samples
|
172 |
+
* Columns: <code>sentence_0</code>, <code>sentence_1</code>, and <code>label</code>
|
173 |
+
* Approximate statistics based on the first 1000 samples:
|
174 |
+
| | sentence_0 | sentence_1 | label |
|
175 |
+
|:--------|:-------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|:--------------------------------------------------------------|
|
176 |
+
| type | string | string | float |
|
177 |
+
| details | <ul><li>min: 2 tokens</li><li>mean: 140.38 tokens</li><li>max: 2514 tokens</li></ul> | <ul><li>min: 5 tokens</li><li>mean: 20.49 tokens</li><li>max: 141 tokens</li></ul> | <ul><li>min: 1.0</li><li>mean: 1.0</li><li>max: 1.0</li></ul> |
|
178 |
+
* Samples:
|
179 |
+
| sentence_0 | sentence_1 | label |
|
180 |
+
|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:--------------------------------------------------------------------------------------------------------------------------------|:-----------------|
|
181 |
+
| <code>Olhem aí a mineradora da Noruega destruindo o meio ambiente na Amazônia. Lula vendeu o solo para a Noruega em documento secreto. Ela arrecada 2 bilhoes ao ano e devolve 180 milhoes para consertar o estrago que ela mesmo faz na Amazônia.</code> | <code>O ex-presidente Lula vendeu o solo da Amazônia para uma empresa norueguesa</code> | <code>1.0</code> |
|
182 |
+
| <code>EL CONGRESO DANIE Cometió una burrada Al aprobar en primera votación con 113 votos a favor, 5 en contra y una abstención, que la vacuna contra el coronavirus sea de manera OBLIGATORIA para todos Que les pasa a estos genios de la política, acaso no saben que están violando leyes universales de Derechos Humanos¿Qué les pasa a estos congresistas?. . ¿ Acaso desconocen y pisotean las leyes internacionales que respaldan los Derechos Humanos Universales ???. . Absolutamente nadie puede ser obligado a vacunarse. . Igualmente, ningún procedimiento médico puede hacerse sin el consentimiento del paciente. . No lo digo yo, lo dice la UNESCO,la Organización de las Naciones Unidas para la Educación, la Ciencia y la Cultura.... Que en sus normativas explican lo siguiente : . SOLO UNO MISMO TIENE EL CONTROL DE SU PROPIO CUERPO, nadie tiene el control de nuestro cuerpo más que uno mismo, nadie puede intervenir en nuestro cuerpo bajo ninguna circunstancia sin nuestro consentimiento. . Legalmente bajo t...</code> | <code>En Perú el Congreso aprobó que la vacuna contra el covid-19 sea obligatoria</code> | <code>1.0</code> |
|
183 |
+
| <code>Why changes to Legislation is so difficult. Debating PTSD in Emergency Services Debating Mental Health Stigma Debating Workers Compensation Debating Cancer Legislation for Firefighters Debating MP's Pay Debating PFAS Contamination Debating Suicide Figures in Australia Debating MP's AllowancesThis tells us everything we need to know about this Government’s priorities.</code> | <code>Accurate description of photos showing the difference in attendance in various parliamentary sessions in Australia</code> | <code>1.0</code> |
|
184 |
+
* Loss: [<code>MultipleNegativesRankingLoss</code>](https://sbert.net/docs/package_reference/sentence_transformer/losses.html#multiplenegativesrankingloss) with these parameters:
|
185 |
+
```json
|
186 |
+
{
|
187 |
+
"scale": 20.0,
|
188 |
+
"similarity_fct": "cos_sim"
|
189 |
+
}
|
190 |
+
```
|
191 |
+
|
192 |
+
### Training Hyperparameters
|
193 |
+
#### Non-Default Hyperparameters
|
194 |
+
|
195 |
+
- `per_device_train_batch_size`: 1
|
196 |
+
- `per_device_eval_batch_size`: 1
|
197 |
+
- `num_train_epochs`: 1
|
198 |
+
- `multi_dataset_batch_sampler`: round_robin
|
199 |
+
|
200 |
+
#### All Hyperparameters
|
201 |
+
<details><summary>Click to expand</summary>
|
202 |
+
|
203 |
+
- `overwrite_output_dir`: False
|
204 |
+
- `do_predict`: False
|
205 |
+
- `eval_strategy`: no
|
206 |
+
- `prediction_loss_only`: True
|
207 |
+
- `per_device_train_batch_size`: 1
|
208 |
+
- `per_device_eval_batch_size`: 1
|
209 |
+
- `per_gpu_train_batch_size`: None
|
210 |
+
- `per_gpu_eval_batch_size`: None
|
211 |
+
- `gradient_accumulation_steps`: 1
|
212 |
+
- `eval_accumulation_steps`: None
|
213 |
+
- `torch_empty_cache_steps`: None
|
214 |
+
- `learning_rate`: 5e-05
|
215 |
+
- `weight_decay`: 0.0
|
216 |
+
- `adam_beta1`: 0.9
|
217 |
+
- `adam_beta2`: 0.999
|
218 |
+
- `adam_epsilon`: 1e-08
|
219 |
+
- `max_grad_norm`: 1
|
220 |
+
- `num_train_epochs`: 1
|
221 |
+
- `max_steps`: -1
|
222 |
+
- `lr_scheduler_type`: linear
|
223 |
+
- `lr_scheduler_kwargs`: {}
|
224 |
+
- `warmup_ratio`: 0.0
|
225 |
+
- `warmup_steps`: 0
|
226 |
+
- `log_level`: passive
|
227 |
+
- `log_level_replica`: warning
|
228 |
+
- `log_on_each_node`: True
|
229 |
+
- `logging_nan_inf_filter`: True
|
230 |
+
- `save_safetensors`: True
|
231 |
+
- `save_on_each_node`: False
|
232 |
+
- `save_only_model`: False
|
233 |
+
- `restore_callback_states_from_checkpoint`: False
|
234 |
+
- `no_cuda`: False
|
235 |
+
- `use_cpu`: False
|
236 |
+
- `use_mps_device`: False
|
237 |
+
- `seed`: 42
|
238 |
+
- `data_seed`: None
|
239 |
+
- `jit_mode_eval`: False
|
240 |
+
- `use_ipex`: False
|
241 |
+
- `bf16`: False
|
242 |
+
- `fp16`: False
|
243 |
+
- `fp16_opt_level`: O1
|
244 |
+
- `half_precision_backend`: auto
|
245 |
+
- `bf16_full_eval`: False
|
246 |
+
- `fp16_full_eval`: False
|
247 |
+
- `tf32`: None
|
248 |
+
- `local_rank`: 0
|
249 |
+
- `ddp_backend`: None
|
250 |
+
- `tpu_num_cores`: None
|
251 |
+
- `tpu_metrics_debug`: False
|
252 |
+
- `debug`: []
|
253 |
+
- `dataloader_drop_last`: False
|
254 |
+
- `dataloader_num_workers`: 0
|
255 |
+
- `dataloader_prefetch_factor`: None
|
256 |
+
- `past_index`: -1
|
257 |
+
- `disable_tqdm`: False
|
258 |
+
- `remove_unused_columns`: True
|
259 |
+
- `label_names`: None
|
260 |
+
- `load_best_model_at_end`: False
|
261 |
+
- `ignore_data_skip`: False
|
262 |
+
- `fsdp`: []
|
263 |
+
- `fsdp_min_num_params`: 0
|
264 |
+
- `fsdp_config`: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}
|
265 |
+
- `fsdp_transformer_layer_cls_to_wrap`: None
|
266 |
+
- `accelerator_config`: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'non_blocking': False, 'gradient_accumulation_kwargs': None}
|
267 |
+
- `deepspeed`: None
|
268 |
+
- `label_smoothing_factor`: 0.0
|
269 |
+
- `optim`: adamw_torch
|
270 |
+
- `optim_args`: None
|
271 |
+
- `adafactor`: False
|
272 |
+
- `group_by_length`: False
|
273 |
+
- `length_column_name`: length
|
274 |
+
- `ddp_find_unused_parameters`: None
|
275 |
+
- `ddp_bucket_cap_mb`: None
|
276 |
+
- `ddp_broadcast_buffers`: False
|
277 |
+
- `dataloader_pin_memory`: True
|
278 |
+
- `dataloader_persistent_workers`: False
|
279 |
+
- `skip_memory_metrics`: True
|
280 |
+
- `use_legacy_prediction_loop`: False
|
281 |
+
- `push_to_hub`: False
|
282 |
+
- `resume_from_checkpoint`: None
|
283 |
+
- `hub_model_id`: None
|
284 |
+
- `hub_strategy`: every_save
|
285 |
+
- `hub_private_repo`: None
|
286 |
+
- `hub_always_push`: False
|
287 |
+
- `gradient_checkpointing`: False
|
288 |
+
- `gradient_checkpointing_kwargs`: None
|
289 |
+
- `include_inputs_for_metrics`: False
|
290 |
+
- `include_for_metrics`: []
|
291 |
+
- `eval_do_concat_batches`: True
|
292 |
+
- `fp16_backend`: auto
|
293 |
+
- `push_to_hub_model_id`: None
|
294 |
+
- `push_to_hub_organization`: None
|
295 |
+
- `mp_parameters`:
|
296 |
+
- `auto_find_batch_size`: False
|
297 |
+
- `full_determinism`: False
|
298 |
+
- `torchdynamo`: None
|
299 |
+
- `ray_scope`: last
|
300 |
+
- `ddp_timeout`: 1800
|
301 |
+
- `torch_compile`: False
|
302 |
+
- `torch_compile_backend`: None
|
303 |
+
- `torch_compile_mode`: None
|
304 |
+
- `dispatch_batches`: None
|
305 |
+
- `split_batches`: None
|
306 |
+
- `include_tokens_per_second`: False
|
307 |
+
- `include_num_input_tokens_seen`: False
|
308 |
+
- `neftune_noise_alpha`: None
|
309 |
+
- `optim_target_modules`: None
|
310 |
+
- `batch_eval_metrics`: False
|
311 |
+
- `eval_on_start`: False
|
312 |
+
- `use_liger_kernel`: False
|
313 |
+
- `eval_use_gather_object`: False
|
314 |
+
- `average_tokens_across_devices`: False
|
315 |
+
- `prompts`: None
|
316 |
+
- `batch_sampler`: batch_sampler
|
317 |
+
- `multi_dataset_batch_sampler`: round_robin
|
318 |
+
|
319 |
+
</details>
|
320 |
+
|
321 |
+
### Training Logs
|
322 |
+
| Epoch | Step | Training Loss |
|
323 |
+
|:------:|:-----:|:-------------:|
|
324 |
+
| 0.0194 | 500 | 0.0 |
|
325 |
+
| 0.0388 | 1000 | 0.0 |
|
326 |
+
| 0.0583 | 1500 | 0.0 |
|
327 |
+
| 0.0777 | 2000 | 0.0 |
|
328 |
+
| 0.0971 | 2500 | 0.0 |
|
329 |
+
| 0.1165 | 3000 | 0.0 |
|
330 |
+
| 0.1360 | 3500 | 0.0 |
|
331 |
+
| 0.1554 | 4000 | 0.0 |
|
332 |
+
| 0.1748 | 4500 | 0.0 |
|
333 |
+
| 0.1942 | 5000 | 0.0 |
|
334 |
+
| 0.2137 | 5500 | 0.0 |
|
335 |
+
| 0.2331 | 6000 | 0.0 |
|
336 |
+
| 0.2525 | 6500 | 0.0 |
|
337 |
+
| 0.2719 | 7000 | 0.0 |
|
338 |
+
| 0.2913 | 7500 | 0.0 |
|
339 |
+
| 0.3108 | 8000 | 0.0 |
|
340 |
+
| 0.3302 | 8500 | 0.0 |
|
341 |
+
| 0.3496 | 9000 | 0.0 |
|
342 |
+
| 0.3690 | 9500 | 0.0 |
|
343 |
+
| 0.3885 | 10000 | 0.0 |
|
344 |
+
| 0.4079 | 10500 | 0.0 |
|
345 |
+
| 0.4273 | 11000 | 0.0 |
|
346 |
+
| 0.4467 | 11500 | 0.0 |
|
347 |
+
| 0.4661 | 12000 | 0.0 |
|
348 |
+
| 0.4856 | 12500 | 0.0 |
|
349 |
+
| 0.5050 | 13000 | 0.0 |
|
350 |
+
| 0.5244 | 13500 | 0.0 |
|
351 |
+
| 0.5438 | 14000 | 0.0 |
|
352 |
+
| 0.5633 | 14500 | 0.0 |
|
353 |
+
| 0.5827 | 15000 | 0.0 |
|
354 |
+
| 0.6021 | 15500 | 0.0 |
|
355 |
+
| 0.6215 | 16000 | 0.0 |
|
356 |
+
| 0.6410 | 16500 | 0.0 |
|
357 |
+
| 0.6604 | 17000 | 0.0 |
|
358 |
+
| 0.6798 | 17500 | 0.0 |
|
359 |
+
| 0.6992 | 18000 | 0.0 |
|
360 |
+
| 0.7186 | 18500 | 0.0 |
|
361 |
+
| 0.7381 | 19000 | 0.0 |
|
362 |
+
| 0.7575 | 19500 | 0.0 |
|
363 |
+
| 0.7769 | 20000 | 0.0 |
|
364 |
+
| 0.7963 | 20500 | 0.0 |
|
365 |
+
| 0.8158 | 21000 | 0.0 |
|
366 |
+
| 0.8352 | 21500 | 0.0 |
|
367 |
+
| 0.8546 | 22000 | 0.0 |
|
368 |
+
| 0.8740 | 22500 | 0.0 |
|
369 |
+
| 0.8934 | 23000 | 0.0 |
|
370 |
+
| 0.9129 | 23500 | 0.0 |
|
371 |
+
| 0.9323 | 24000 | 0.0 |
|
372 |
+
| 0.9517 | 24500 | 0.0 |
|
373 |
+
| 0.9711 | 25000 | 0.0 |
|
374 |
+
| 0.9906 | 25500 | 0.0 |
|
375 |
+
|
376 |
+
|
377 |
+
### Framework Versions
|
378 |
+
- Python: 3.11.11
|
379 |
+
- Sentence Transformers: 3.4.1
|
380 |
+
- Transformers: 4.48.3
|
381 |
+
- PyTorch: 2.5.1+cu124
|
382 |
+
- Accelerate: 1.3.0
|
383 |
+
- Datasets: 3.3.1
|
384 |
+
- Tokenizers: 0.21.0
|
385 |
+
|
386 |
+
## Citation
|
387 |
+
|
388 |
+
### BibTeX
|
389 |
+
|
390 |
+
#### Sentence Transformers
|
391 |
+
```bibtex
|
392 |
+
@inproceedings{reimers-2019-sentence-bert,
|
393 |
+
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
|
394 |
+
author = "Reimers, Nils and Gurevych, Iryna",
|
395 |
+
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
|
396 |
+
month = "11",
|
397 |
+
year = "2019",
|
398 |
+
publisher = "Association for Computational Linguistics",
|
399 |
+
url = "https://arxiv.org/abs/1908.10084",
|
400 |
+
}
|
401 |
+
```
|
402 |
+
|
403 |
+
#### MultipleNegativesRankingLoss
|
404 |
+
```bibtex
|
405 |
+
@misc{henderson2017efficient,
|
406 |
+
title={Efficient Natural Language Response Suggestion for Smart Reply},
|
407 |
+
author={Matthew Henderson and Rami Al-Rfou and Brian Strope and Yun-hsuan Sung and Laszlo Lukacs and Ruiqi Guo and Sanjiv Kumar and Balint Miklos and Ray Kurzweil},
|
408 |
+
year={2017},
|
409 |
+
eprint={1705.00652},
|
410 |
+
archivePrefix={arXiv},
|
411 |
+
primaryClass={cs.CL}
|
412 |
+
}
|
413 |
+
```
|
414 |
+
|
415 |
+
<!--
|
416 |
+
## Glossary
|
417 |
+
|
418 |
+
*Clearly define terms in order to be accessible across audiences.*
|
419 |
+
-->
|
420 |
+
|
421 |
+
<!--
|
422 |
+
## Model Card Authors
|
423 |
+
|
424 |
+
*Lists the people who create the model card, providing recognition and accountability for the detailed work that goes into its construction.*
|
425 |
+
-->
|
426 |
+
|
427 |
+
<!--
|
428 |
+
## Model Card Contact
|
429 |
+
|
430 |
+
*Provides a way for people who have updates to the Model Card, suggestions, or questions, to contact the Model Card authors.*
|
431 |
+
-->
|
config.json
ADDED
@@ -0,0 +1,50 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "Alibaba-NLP/gte-multilingual-base",
|
3 |
+
"architectures": [
|
4 |
+
"NewModel"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.0,
|
7 |
+
"auto_map": {
|
8 |
+
"AutoConfig": "Alibaba-NLP/new-impl--configuration.NewConfig",
|
9 |
+
"AutoModel": "Alibaba-NLP/new-impl--modeling.NewModel",
|
10 |
+
"AutoModelForMaskedLM": "Alibaba-NLP/new-impl--modeling.NewForMaskedLM",
|
11 |
+
"AutoModelForMultipleChoice": "Alibaba-NLP/new-impl--modeling.NewForMultipleChoice",
|
12 |
+
"AutoModelForQuestionAnswering": "Alibaba-NLP/new-impl--modeling.NewForQuestionAnswering",
|
13 |
+
"AutoModelForSequenceClassification": "Alibaba-NLP/new-impl--modeling.NewForSequenceClassification",
|
14 |
+
"AutoModelForTokenClassification": "Alibaba-NLP/new-impl--modeling.NewForTokenClassification"
|
15 |
+
},
|
16 |
+
"classifier_dropout": 0.0,
|
17 |
+
"hidden_act": "gelu",
|
18 |
+
"hidden_dropout_prob": 0.1,
|
19 |
+
"hidden_size": 768,
|
20 |
+
"id2label": {
|
21 |
+
"0": "LABEL_0"
|
22 |
+
},
|
23 |
+
"initializer_range": 0.02,
|
24 |
+
"intermediate_size": 3072,
|
25 |
+
"label2id": {
|
26 |
+
"LABEL_0": 0
|
27 |
+
},
|
28 |
+
"layer_norm_eps": 1e-12,
|
29 |
+
"layer_norm_type": "layer_norm",
|
30 |
+
"logn_attention_clip1": false,
|
31 |
+
"logn_attention_scale": false,
|
32 |
+
"max_position_embeddings": 8192,
|
33 |
+
"model_type": "new",
|
34 |
+
"num_attention_heads": 12,
|
35 |
+
"num_hidden_layers": 12,
|
36 |
+
"pack_qkv": true,
|
37 |
+
"pad_token_id": 1,
|
38 |
+
"position_embedding_type": "rope",
|
39 |
+
"rope_scaling": {
|
40 |
+
"factor": 8.0,
|
41 |
+
"type": "ntk"
|
42 |
+
},
|
43 |
+
"rope_theta": 20000,
|
44 |
+
"torch_dtype": "float32",
|
45 |
+
"transformers_version": "4.48.3",
|
46 |
+
"type_vocab_size": 1,
|
47 |
+
"unpad_inputs": false,
|
48 |
+
"use_memory_efficient_attention": false,
|
49 |
+
"vocab_size": 250048
|
50 |
+
}
|
config_sentence_transformers.json
ADDED
@@ -0,0 +1,10 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"__version__": {
|
3 |
+
"sentence_transformers": "3.4.1",
|
4 |
+
"transformers": "4.48.3",
|
5 |
+
"pytorch": "2.5.1+cu124"
|
6 |
+
},
|
7 |
+
"prompts": {},
|
8 |
+
"default_prompt_name": null,
|
9 |
+
"similarity_fn_name": "cosine"
|
10 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:b77dc57d2d3fe98ee7e755658de6c27b3ede5fe898bcd3e9cc30991af8743ca7
|
3 |
+
size 1221487872
|
modules.json
ADDED
@@ -0,0 +1,20 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
[
|
2 |
+
{
|
3 |
+
"idx": 0,
|
4 |
+
"name": "0",
|
5 |
+
"path": "",
|
6 |
+
"type": "sentence_transformers.models.Transformer"
|
7 |
+
},
|
8 |
+
{
|
9 |
+
"idx": 1,
|
10 |
+
"name": "1",
|
11 |
+
"path": "1_Pooling",
|
12 |
+
"type": "sentence_transformers.models.Pooling"
|
13 |
+
},
|
14 |
+
{
|
15 |
+
"idx": 2,
|
16 |
+
"name": "2",
|
17 |
+
"path": "2_Normalize",
|
18 |
+
"type": "sentence_transformers.models.Normalize"
|
19 |
+
}
|
20 |
+
]
|
sentence_bert_config.json
ADDED
@@ -0,0 +1,4 @@
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"max_seq_length": 8192,
|
3 |
+
"do_lower_case": false
|
4 |
+
}
|
special_tokens_map.json
ADDED
@@ -0,0 +1,51 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"bos_token": {
|
3 |
+
"content": "<s>",
|
4 |
+
"lstrip": false,
|
5 |
+
"normalized": false,
|
6 |
+
"rstrip": false,
|
7 |
+
"single_word": false
|
8 |
+
},
|
9 |
+
"cls_token": {
|
10 |
+
"content": "<s>",
|
11 |
+
"lstrip": false,
|
12 |
+
"normalized": false,
|
13 |
+
"rstrip": false,
|
14 |
+
"single_word": false
|
15 |
+
},
|
16 |
+
"eos_token": {
|
17 |
+
"content": "</s>",
|
18 |
+
"lstrip": false,
|
19 |
+
"normalized": false,
|
20 |
+
"rstrip": false,
|
21 |
+
"single_word": false
|
22 |
+
},
|
23 |
+
"mask_token": {
|
24 |
+
"content": "<mask>",
|
25 |
+
"lstrip": true,
|
26 |
+
"normalized": false,
|
27 |
+
"rstrip": false,
|
28 |
+
"single_word": false
|
29 |
+
},
|
30 |
+
"pad_token": {
|
31 |
+
"content": "<pad>",
|
32 |
+
"lstrip": false,
|
33 |
+
"normalized": false,
|
34 |
+
"rstrip": false,
|
35 |
+
"single_word": false
|
36 |
+
},
|
37 |
+
"sep_token": {
|
38 |
+
"content": "</s>",
|
39 |
+
"lstrip": false,
|
40 |
+
"normalized": false,
|
41 |
+
"rstrip": false,
|
42 |
+
"single_word": false
|
43 |
+
},
|
44 |
+
"unk_token": {
|
45 |
+
"content": "<unk>",
|
46 |
+
"lstrip": false,
|
47 |
+
"normalized": false,
|
48 |
+
"rstrip": false,
|
49 |
+
"single_word": false
|
50 |
+
}
|
51 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aa7a6ad87a7ce8fe196787355f6af7d03aee94d19c54a5eb1392ed18c8ef451a
|
3 |
+
size 17082988
|
tokenizer_config.json
ADDED
@@ -0,0 +1,55 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"added_tokens_decoder": {
|
3 |
+
"0": {
|
4 |
+
"content": "<s>",
|
5 |
+
"lstrip": false,
|
6 |
+
"normalized": false,
|
7 |
+
"rstrip": false,
|
8 |
+
"single_word": false,
|
9 |
+
"special": true
|
10 |
+
},
|
11 |
+
"1": {
|
12 |
+
"content": "<pad>",
|
13 |
+
"lstrip": false,
|
14 |
+
"normalized": false,
|
15 |
+
"rstrip": false,
|
16 |
+
"single_word": false,
|
17 |
+
"special": true
|
18 |
+
},
|
19 |
+
"2": {
|
20 |
+
"content": "</s>",
|
21 |
+
"lstrip": false,
|
22 |
+
"normalized": false,
|
23 |
+
"rstrip": false,
|
24 |
+
"single_word": false,
|
25 |
+
"special": true
|
26 |
+
},
|
27 |
+
"3": {
|
28 |
+
"content": "<unk>",
|
29 |
+
"lstrip": false,
|
30 |
+
"normalized": false,
|
31 |
+
"rstrip": false,
|
32 |
+
"single_word": false,
|
33 |
+
"special": true
|
34 |
+
},
|
35 |
+
"250001": {
|
36 |
+
"content": "<mask>",
|
37 |
+
"lstrip": true,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false,
|
41 |
+
"special": true
|
42 |
+
}
|
43 |
+
},
|
44 |
+
"bos_token": "<s>",
|
45 |
+
"clean_up_tokenization_spaces": true,
|
46 |
+
"cls_token": "<s>",
|
47 |
+
"eos_token": "</s>",
|
48 |
+
"extra_special_tokens": {},
|
49 |
+
"mask_token": "<mask>",
|
50 |
+
"model_max_length": 8192,
|
51 |
+
"pad_token": "<pad>",
|
52 |
+
"sep_token": "</s>",
|
53 |
+
"tokenizer_class": "XLMRobertaTokenizer",
|
54 |
+
"unk_token": "<unk>"
|
55 |
+
}
|