metadata
model-index:
- name: Gameselo/STS-multilingual-mpnet-base-v2
results:
- dataset:
config: it
name: MTEB STS22
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
split: test
type: mteb/sts22-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.6847049462613332
task:
type: STS
- dataset:
config: es
name: MTEB STS22
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
split: test
type: mteb/sts22-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.6620948502618977
task:
type: STS
- dataset:
config: fr
name: MTEB STS22
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
split: test
type: mteb/sts22-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.7875616631597785
task:
type: STS
- dataset:
config: pl-en
name: MTEB STS22
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
split: test
type: mteb/sts22-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.7510805416538202
task:
type: STS
- dataset:
config: ar
name: MTEB STS22
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
split: test
type: mteb/sts22-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.6265329479575293
task:
type: STS
- dataset:
config: pl
name: MTEB STS22
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
split: test
type: mteb/sts22-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.4335552432730643
task:
type: STS
- dataset:
config: de
name: MTEB STS22
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
split: test
type: mteb/sts22-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.5774252131250034
task:
type: STS
- dataset:
config: tr
name: MTEB STS22
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
split: test
type: mteb/sts22-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.6383757017928495
task:
type: STS
- dataset:
config: es-it
name: MTEB STS22
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
split: test
type: mteb/sts22-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.6624635951676386
task:
type: STS
- dataset:
config: ru
name: MTEB STS22
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
split: test
type: mteb/sts22-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.5866853707548388
task:
type: STS
- dataset:
config: en
name: MTEB STS22
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
split: test
type: mteb/sts22-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.6385354535483773
task:
type: STS
- dataset:
config: zh-en
name: MTEB STS22
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
split: test
type: mteb/sts22-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.6537294853166558
task:
type: STS
- dataset:
config: zh
name: MTEB STS22
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
split: test
type: mteb/sts22-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.6319430830291571
task:
type: STS
- dataset:
config: fr-pl
name: MTEB STS22
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
split: test
type: mteb/sts22-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.8451542547285167
task:
type: STS
- dataset:
config: de-fr
name: MTEB STS22
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
split: test
type: mteb/sts22-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.5798716781400349
task:
type: STS
- dataset:
config: es-en
name: MTEB STS22
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
split: test
type: mteb/sts22-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.7518021273920814
task:
type: STS
- dataset:
config: de-en
name: MTEB STS22
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
split: test
type: mteb/sts22-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.5749790581441845
task:
type: STS
- dataset:
config: de-pl
name: MTEB STS22
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3
split: test
type: mteb/sts22-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.44220332625465214
task:
type: STS
- dataset:
config: default
name: MTEB STSBenchmark
revision: b0fddb56ed78048fa8b90373c8a3cfc37b684831
split: test
type: mteb/stsbenchmark-sts
metrics:
- type: cosine_spearman
value: 0.9762486352335524
task:
type: STS
- dataset:
config: en-tr
name: MTEB STS17
revision: faeb762787bd10488a50c8b5be4a3b82e411949c
split: test
type: mteb/sts17-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.7987027653005363
task:
type: STS
- dataset:
config: ko-ko
name: MTEB STS17
revision: faeb762787bd10488a50c8b5be4a3b82e411949c
split: test
type: mteb/sts17-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.9766336939338607
task:
type: STS
- dataset:
config: fr-en
name: MTEB STS17
revision: faeb762787bd10488a50c8b5be4a3b82e411949c
split: test
type: mteb/sts17-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.9067607122592818
task:
type: STS
- dataset:
config: en-ar
name: MTEB STS17
revision: faeb762787bd10488a50c8b5be4a3b82e411949c
split: test
type: mteb/sts17-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.7703365842088069
task:
type: STS
- dataset:
config: nl-en
name: MTEB STS17
revision: faeb762787bd10488a50c8b5be4a3b82e411949c
split: test
type: mteb/sts17-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.9114826394926738
task:
type: STS
- dataset:
config: it-en
name: MTEB STS17
revision: faeb762787bd10488a50c8b5be4a3b82e411949c
split: test
type: mteb/sts17-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.9246785886944904
task:
type: STS
- dataset:
config: ar-ar
name: MTEB STS17
revision: faeb762787bd10488a50c8b5be4a3b82e411949c
split: test
type: mteb/sts17-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.8124393788492182
task:
type: STS
- dataset:
config: es-es
name: MTEB STS17
revision: faeb762787bd10488a50c8b5be4a3b82e411949c
split: test
type: mteb/sts17-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.872701191632785
task:
type: STS
- dataset:
config: en-de
name: MTEB STS17
revision: faeb762787bd10488a50c8b5be4a3b82e411949c
split: test
type: mteb/sts17-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.9109414091487618
task:
type: STS
- dataset:
config: es-en
name: MTEB STS17
revision: faeb762787bd10488a50c8b5be4a3b82e411949c
split: test
type: mteb/sts17-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.8553203530552356
task:
type: STS
- dataset:
config: en-en
name: MTEB STS17
revision: faeb762787bd10488a50c8b5be4a3b82e411949c
split: test
type: mteb/sts17-crosslingual-sts
metrics:
- type: cosine_spearman
value: 0.9378741534997558
task:
type: STS
language: []
library_name: sentence-transformers
tags:
- mteb
- sentence-transformers
- sentence-similarity
- feature-extraction
- dataset_size:100K<n<1M
- loss:AnglELoss
base_model: sentence-transformers/paraphrase-multilingual-mpnet-base-v2
widget:
- source_sentence: 有些人在路上溜达。
sentences:
- Folk går
- Otururken gitar çalan adam.
- ארה"ב קבעה שסוריה השתמשה בנשק כימי
- source_sentence: 緬甸以前稱為緬甸。
sentences:
- 缅甸以前叫缅甸。
- This is very contradictory.
- 한 남자가 아기를 안고 의자에 앉아 잠들어 있다.
- source_sentence: אדם כותב.
sentences:
- האדם כותב.
- questa non è una risposta.
- 7 שוטרים נהרגו ו-4 שוטרים נפצעו.
- source_sentence: הם מפחדים.
sentences:
- liên quan đến rủi ro đáng kể;
- A man is playing a guitar.
- A man is playing a piano.
- source_sentence: 一个女人正在洗澡。
sentences:
- A woman is taking a bath.
- En jente børster håret sitt
- אדם מחלק תפוח אדמה.
pipeline_tag: sentence-similarity
State-of-the-Art Results Comparison (MTEB STS Multilingual Leaderboard)
Dataset | State-of-the-art (Multi) | STSb-XLM-RoBERTa-base | STS Multilingual MPNet base v2 |
---|---|---|---|
Average | 73.17 | 71.68 | 73.89 |
STS17 (ar-ar) | 81.87 | 80.43 | 81.24 |
STS17 (en-ar) | 81.22 | 76.3 | 77.03 |
STS17 (en-de) | 87.3 | 91.06 | 91.09 |
STS17 (en-tr) | 77.18 | 80.74 | 79.87 |
STS17 (es-en) | 88.24 | 83.09 | 85.53 |
STS17 (es-es) | 88.25 | 84.16 | 87.27 |
STS17 (fr-en) | 88.06 | 91.33 | 90.68 |
STS17 (it-en) | 89.68 | 92.87 | 92.47 |
STS17 (ko-ko) | 83.69 | 97.67 | 97.66 |
STS17 (nl-en) | 88.25 | 92.13 | 91.15 |
STS22 (ar) | 58.67 | 58.67 | 62.66 |
STS22 (de) | 60.12 | 52.17 | 57.74 |
STS22 (de-en) | 60.92 | 58.5 | 57.5 |
STS22 (de-fr) | 67.79 | 51.28 | 57.99 |
STS22 (de-pl) | 58.69 | 44.56 | 44.22 |
STS22 (es) | 68.57 | 63.68 | 66.21 |
STS22 (es-en) | 78.8 | 70.65 | 75.18 |
STS22 (es-it) | 75.04 | 60.88 | 66.25 |
STS22 (fr) | 83.75 | 76.46 | 78.76 |
STS22 (fr-pl) | 84.52 | 84.52 | 84.52 |
STS22 (it) | 79.28 | 66.73 | 68.47 |
STS22 (pl) | 42.08 | 41.18 | 43.36 |
STS22 (pl-en) | 77.5 | 64.35 | 75.11 |
STS22 (ru) | 61.71 | 58.59 | 58.67 |
STS22 (tr) | 68.72 | 57.52 | 63.84 |
STS22 (zh-en) | 71.88 | 60.69 | 65.37 |
STSb | 89.86 | 95.05 | 95.15 |
Bold indicates the best result in each row.
SentenceTransformer based on sentence-transformers/paraphrase-multilingual-mpnet-base-v2
This is a sentence-transformers model finetuned from sentence-transformers/paraphrase-multilingual-mpnet-base-v2. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
Model Details
Model Description
- Model Type: Sentence Transformer
- Base model: sentence-transformers/paraphrase-multilingual-mpnet-base-v2
- Maximum Sequence Length: 128 tokens
- Output Dimensionality: 768 tokens
- Similarity Function: Cosine Similarity
Model Sources
- Documentation: Sentence Transformers Documentation
- Repository: Sentence Transformers on GitHub
- Hugging Face: Sentence Transformers on Hugging Face
Full Model Architecture
SentenceTransformer(
(0): Transformer({'max_seq_length': 128, 'do_lower_case': False}) with Transformer model: XLMRobertaModel
(1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
)
Usage
Direct Usage (Sentence Transformers)
First install the Sentence Transformers library:
pip install -U sentence-transformers
Then you can load this model and run inference.
from sentence_transformers import SentenceTransformer
# Download from the 🤗 Hub
model = SentenceTransformer("Gameselo/STS-multilingual-mpnet-base-v2")
# Run inference
sentences = [
'一个女人正在洗澡。',
'A woman is taking a bath.',
'En jente børster håret sitt',
]
embeddings = model.encode(sentences)
print(embeddings.shape)
# [3, 768]
# Get the similarity scores for the embeddings
similarities = model.similarity(embeddings, embeddings)
print(similarities.shape)
# [3, 3]
Evaluation
Metrics
Semantic Similarity
- Dataset:
sts-dev
- Evaluated with
EmbeddingSimilarityEvaluator
Metric | Value |
---|---|
pearson_cosine | 0.9551 |
spearman_cosine | 0.9593 |
pearson_manhattan | 0.927 |
spearman_manhattan | 0.9383 |
pearson_euclidean | 0.9278 |
spearman_euclidean | 0.9394 |
pearson_dot | 0.876 |
spearman_dot | 0.8865 |
pearson_max | 0.9551 |
spearman_max | 0.9593 |
Evalutation results vs SOTA results
- Dataset:
sts-test
- Evaluated with
EmbeddingSimilarityEvaluator
Metric | Value |
---|---|
pearson_cosine | 0.948 |
spearman_cosine | 0.9515 |
pearson_manhattan | 0.9252 |
spearman_manhattan | 0.9352 |
pearson_euclidean | 0.9258 |
spearman_euclidean | 0.9364 |
pearson_dot | 0.8443 |
spearman_dot | 0.8435 |
pearson_max | 0.948 |
spearman_max | 0.9515 |
Training Details
Training Dataset
Unnamed Dataset
- Size: 226,547 training samples
- Columns:
sentence_0
,sentence_1
, andlabel
- Approximate statistics based on the first 1000 samples:
sentence_0 sentence_1 label type string string float details - min: 3 tokens
- mean: 20.05 tokens
- max: 128 tokens
- min: 4 tokens
- mean: 19.94 tokens
- max: 128 tokens
- min: 0.0
- mean: 1.92
- max: 398.6
- Samples:
sentence_0 sentence_1 label Bir kadın makineye dikiş dikiyor.
Bir kadın biraz et ekiyor.
0.12
Snowden 'gegeven vluchtelingendocument door Ecuador'.
Snowden staat op het punt om uit Moskou te vliegen
0.24000000953674316
Czarny pies idzie mostem przez wodę
Czarny pies nie idzie mostem przez wodę
0.74000000954
- Loss:
AnglELoss
with these parameters:{ "scale": 20.0, "similarity_fct": "pairwise_angle_sim" }
Training Hyperparameters
Non-Default Hyperparameters
per_device_train_batch_size
: 256per_device_eval_batch_size
: 256num_train_epochs
: 10multi_dataset_batch_sampler
: round_robin
All Hyperparameters
Click to expand
overwrite_output_dir
: Falsedo_predict
: Falseprediction_loss_only
: Trueper_device_train_batch_size
: 256per_device_eval_batch_size
: 256per_gpu_train_batch_size
: Noneper_gpu_eval_batch_size
: Nonegradient_accumulation_steps
: 1eval_accumulation_steps
: Nonelearning_rate
: 5e-05weight_decay
: 0.0adam_beta1
: 0.9adam_beta2
: 0.999adam_epsilon
: 1e-08max_grad_norm
: 1num_train_epochs
: 10max_steps
: -1lr_scheduler_type
: linearlr_scheduler_kwargs
: {}warmup_ratio
: 0.0warmup_steps
: 0log_level
: passivelog_level_replica
: warninglog_on_each_node
: Truelogging_nan_inf_filter
: Truesave_safetensors
: Truesave_on_each_node
: Falsesave_only_model
: Falseno_cuda
: Falseuse_cpu
: Falseuse_mps_device
: Falseseed
: 42data_seed
: Nonejit_mode_eval
: Falseuse_ipex
: Falsebf16
: Falsefp16
: Falsefp16_opt_level
: O1half_precision_backend
: autobf16_full_eval
: Falsefp16_full_eval
: Falsetf32
: Nonelocal_rank
: 0ddp_backend
: Nonetpu_num_cores
: Nonetpu_metrics_debug
: Falsedebug
: []dataloader_drop_last
: Falsedataloader_num_workers
: 0dataloader_prefetch_factor
: Nonepast_index
: -1disable_tqdm
: Falseremove_unused_columns
: Truelabel_names
: Noneload_best_model_at_end
: Falseignore_data_skip
: Falsefsdp
: []fsdp_min_num_params
: 0fsdp_config
: {'min_num_params': 0, 'xla': False, 'xla_fsdp_v2': False, 'xla_fsdp_grad_ckpt': False}fsdp_transformer_layer_cls_to_wrap
: Noneaccelerator_config
: {'split_batches': False, 'dispatch_batches': None, 'even_batches': True, 'use_seedable_sampler': True, 'gradient_accumulation_kwargs': None}deepspeed
: Nonelabel_smoothing_factor
: 0.0optim
: adamw_torchoptim_args
: Noneadafactor
: Falsegroup_by_length
: Falselength_column_name
: lengthddp_find_unused_parameters
: Noneddp_bucket_cap_mb
: Noneddp_broadcast_buffers
: Falsedataloader_pin_memory
: Truedataloader_persistent_workers
: Falseskip_memory_metrics
: Trueuse_legacy_prediction_loop
: Falsepush_to_hub
: Falseresume_from_checkpoint
: Nonehub_model_id
: Nonehub_strategy
: every_savehub_private_repo
: Falsehub_always_push
: Falsegradient_checkpointing
: Falsegradient_checkpointing_kwargs
: Noneinclude_inputs_for_metrics
: Falseeval_do_concat_batches
: Truefp16_backend
: autopush_to_hub_model_id
: Nonepush_to_hub_organization
: Nonemp_parameters
:auto_find_batch_size
: Falsefull_determinism
: Falsetorchdynamo
: Noneray_scope
: lastddp_timeout
: 1800torch_compile
: Falsetorch_compile_backend
: Nonetorch_compile_mode
: Nonedispatch_batches
: Nonesplit_batches
: Noneinclude_tokens_per_second
: Falseinclude_num_input_tokens_seen
: Falseneftune_noise_alpha
: Noneoptim_target_modules
: Nonebatch_sampler
: batch_samplermulti_dataset_batch_sampler
: round_robin
Training Logs
Epoch | Step | Training Loss | sts-dev_spearman_cosine | sts-test_spearman_cosine |
---|---|---|---|---|
0.5650 | 500 | 10.9426 | - | - |
1.0 | 885 | - | 0.9202 | - |
1.1299 | 1000 | 9.7184 | - | - |
1.6949 | 1500 | 9.5348 | - | - |
2.0 | 1770 | - | 0.9400 | - |
2.2599 | 2000 | 9.4412 | - | - |
2.8249 | 2500 | 9.3097 | - | - |
3.0 | 2655 | - | 0.9489 | - |
3.3898 | 3000 | 9.2357 | - | - |
3.9548 | 3500 | 9.1594 | - | - |
4.0 | 3540 | - | 0.9528 | - |
4.5198 | 4000 | 9.0963 | - | - |
5.0 | 4425 | - | 0.9553 | - |
5.0847 | 4500 | 9.0382 | - | - |
5.6497 | 5000 | 8.9837 | - | - |
6.0 | 5310 | - | 0.9567 | - |
6.2147 | 5500 | 8.9403 | - | - |
6.7797 | 6000 | 8.8841 | - | - |
7.0 | 6195 | - | 0.9581 | - |
7.3446 | 6500 | 8.8513 | - | - |
7.9096 | 7000 | 8.81 | - | - |
8.0 | 7080 | - | 0.9582 | - |
8.4746 | 7500 | 8.8069 | - | - |
9.0 | 7965 | - | 0.9589 | - |
9.0395 | 8000 | 8.7616 | - | - |
9.6045 | 8500 | 8.7521 | - | - |
10.0 | 8850 | - | 0.9593 | 0.6266 |
Framework Versions
- Python: 3.9.7
- Sentence Transformers: 3.0.0
- Transformers: 4.40.1
- PyTorch: 2.3.0+cu121
- Accelerate: 0.29.3
- Datasets: 2.19.0
- Tokenizers: 0.19.1
Citation
BibTeX
Sentence Transformers
@inproceedings{reimers-2019-sentence-bert,
title = "Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks",
author = "Reimers, Nils and Gurevych, Iryna",
booktitle = "Proceedings of the 2019 Conference on Empirical Methods in Natural Language Processing",
month = "11",
year = "2019",
publisher = "Association for Computational Linguistics",
url = "https://arxiv.org/abs/1908.10084",
}
AnglELoss
@misc{li2023angleoptimized,
title={AnglE-optimized Text Embeddings},
author={Xianming Li and Jing Li},
year={2023},
eprint={2309.12871},
archivePrefix={arXiv},
primaryClass={cs.CL}
}