Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Merge branch 'main' into model_size_parameters
Browse files- EXTERNAL_MODEL_RESULTS.json +0 -0
- app.py +7 -1
EXTERNAL_MODEL_RESULTS.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
app.py
CHANGED
@@ -442,6 +442,7 @@ EXTERNAL_MODELS = [
|
|
442 |
"use-cmlm-multilingual",
|
443 |
"voyage-2",
|
444 |
"voyage-code-2",
|
|
|
445 |
"voyage-lite-01-instruct",
|
446 |
"voyage-lite-02-instruct",
|
447 |
"xlm-roberta-base",
|
@@ -565,6 +566,7 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
565 |
"use-cmlm-multilingual": "https://huggingface.co/sentence-transformers/use-cmlm-multilingual",
|
566 |
"voyage-2": "https://docs.voyageai.com/embeddings/",
|
567 |
"voyage-code-2": "https://docs.voyageai.com/embeddings/",
|
|
|
568 |
"voyage-lite-01-instruct": "https://docs.voyageai.com/embeddings/",
|
569 |
"voyage-lite-02-instruct": "https://docs.voyageai.com/embeddings/",
|
570 |
"xlm-roberta-base": "https://huggingface.co/xlm-roberta-base",
|
@@ -687,6 +689,7 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
687 |
"use-cmlm-multilingual": 768,
|
688 |
"voyage-2": 1024,
|
689 |
"voyage-code-2": 1536,
|
|
|
690 |
"voyage-lite-01-instruct": 1024,
|
691 |
"voyage-lite-02-instruct": 1024,
|
692 |
"xlm-roberta-base": 768,
|
@@ -809,6 +812,7 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
809 |
"unsup-simcse-bert-base-uncased": 512,
|
810 |
"voyage-2": 1024,
|
811 |
"voyage-code-2": 16000,
|
|
|
812 |
"voyage-lite-01-instruct": 4000,
|
813 |
"voyage-lite-02-instruct": 4000,
|
814 |
"xlm-roberta-base": 514,
|
@@ -901,6 +905,7 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
901 |
"text2vec-large-chinese": 326,
|
902 |
"unsup-simcse-bert-base-uncased": 110,
|
903 |
"use-cmlm-multilingual": 472,
|
|
|
904 |
"voyage-lite-02-instruct": 1220,
|
905 |
"xlm-roberta-base": 279,
|
906 |
"xlm-roberta-large": 560,
|
@@ -930,6 +935,7 @@ PROPRIETARY_MODELS = {
|
|
930 |
"titan-embed-text-v1",
|
931 |
"voyage-2",
|
932 |
"voyage-code-2",
|
|
|
933 |
"voyage-lite-01-instruct",
|
934 |
"voyage-lite-02-instruct",
|
935 |
"google-gecko.text-embedding-preview-0409",
|
@@ -2094,7 +2100,7 @@ with gr.Blocks(css=css) as block:
|
|
2094 |
language_per_task = gr.JSON(value=dict(), visible=False)
|
2095 |
|
2096 |
gr.Markdown(f"""
|
2097 |
-
Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb
|
2098 |
""")
|
2099 |
|
2100 |
with gr.Row():
|
|
|
442 |
"use-cmlm-multilingual",
|
443 |
"voyage-2",
|
444 |
"voyage-code-2",
|
445 |
+
"voyage-law-2",
|
446 |
"voyage-lite-01-instruct",
|
447 |
"voyage-lite-02-instruct",
|
448 |
"xlm-roberta-base",
|
|
|
566 |
"use-cmlm-multilingual": "https://huggingface.co/sentence-transformers/use-cmlm-multilingual",
|
567 |
"voyage-2": "https://docs.voyageai.com/embeddings/",
|
568 |
"voyage-code-2": "https://docs.voyageai.com/embeddings/",
|
569 |
+
"voyage-law-2": "https://docs.voyageai.com/embeddings/",
|
570 |
"voyage-lite-01-instruct": "https://docs.voyageai.com/embeddings/",
|
571 |
"voyage-lite-02-instruct": "https://docs.voyageai.com/embeddings/",
|
572 |
"xlm-roberta-base": "https://huggingface.co/xlm-roberta-base",
|
|
|
689 |
"use-cmlm-multilingual": 768,
|
690 |
"voyage-2": 1024,
|
691 |
"voyage-code-2": 1536,
|
692 |
+
"voyage-law-2": 1024,
|
693 |
"voyage-lite-01-instruct": 1024,
|
694 |
"voyage-lite-02-instruct": 1024,
|
695 |
"xlm-roberta-base": 768,
|
|
|
812 |
"unsup-simcse-bert-base-uncased": 512,
|
813 |
"voyage-2": 1024,
|
814 |
"voyage-code-2": 16000,
|
815 |
+
"voyage-law-2": 4000,
|
816 |
"voyage-lite-01-instruct": 4000,
|
817 |
"voyage-lite-02-instruct": 4000,
|
818 |
"xlm-roberta-base": 514,
|
|
|
905 |
"text2vec-large-chinese": 326,
|
906 |
"unsup-simcse-bert-base-uncased": 110,
|
907 |
"use-cmlm-multilingual": 472,
|
908 |
+
"voyage-law-2": 1220,
|
909 |
"voyage-lite-02-instruct": 1220,
|
910 |
"xlm-roberta-base": 279,
|
911 |
"xlm-roberta-large": 560,
|
|
|
935 |
"titan-embed-text-v1",
|
936 |
"voyage-2",
|
937 |
"voyage-code-2",
|
938 |
+
"voyage-law-2",
|
939 |
"voyage-lite-01-instruct",
|
940 |
"voyage-lite-02-instruct",
|
941 |
"google-gecko.text-embedding-preview-0409",
|
|
|
2100 |
language_per_task = gr.JSON(value=dict(), visible=False)
|
2101 |
|
2102 |
gr.Markdown(f"""
|
2103 |
+
Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb/blob/main/docs/adding_a_model.md" target="_blank" style="text-decoration: underline">MTEB GitHub repository</a> 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models.
|
2104 |
""")
|
2105 |
|
2106 |
with gr.Row():
|