Spaces:
Running
on
CPU Upgrade
Running
on
CPU Upgrade
Muennighoff
commited on
Law tab & Google Gecko (#90)
Browse files- Add law & gecko (75488a8a46d09bf484baa94f4fab0c63704a044b)
- Upload EXTERNAL_MODEL_RESULTS.json (89464054d60c4e429fac163285e61c865ec35da9)
- Update app.py (ea7ebb3bfcbaccc8398de8f5ea8b8f2aadf5e58d)
- Update app.py (27cdb589018666de9affba50f68209d5f4090759)
- Upload EXTERNAL_MODEL_RESULTS.json (91da484e2609d16f209bf38a7493ce861e1354da)
- EXTERNAL_MODEL_RESULTS.json +0 -0
- app.py +58 -3
EXTERNAL_MODEL_RESULTS.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
app.py
CHANGED
@@ -212,6 +212,17 @@ TASK_LIST_RETRIEVAL_FR = [
|
|
212 |
"XPQARetrieval (fr)",
|
213 |
]
|
214 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
215 |
TASK_LIST_RETRIEVAL_PL = [
|
216 |
"ArguAna-PL",
|
217 |
"DBPedia-PL",
|
@@ -321,6 +332,7 @@ def make_clickable_model(model_name, link=None):
|
|
321 |
# Models without metadata, thus we cannot fetch their results naturally
|
322 |
EXTERNAL_MODELS = [
|
323 |
"Baichuan-text-embedding",
|
|
|
324 |
"Cohere-embed-multilingual-v3.0",
|
325 |
"Cohere-embed-multilingual-light-v3.0",
|
326 |
"DanskBERT",
|
@@ -339,6 +351,7 @@ EXTERNAL_MODELS = [
|
|
339 |
"bert-base-swedish-cased",
|
340 |
"bert-base-uncased",
|
341 |
"bge-base-zh-v1.5",
|
|
|
342 |
"bge-large-zh-v1.5",
|
343 |
"bge-large-zh-noinstruct",
|
344 |
"bge-small-zh-v1.5",
|
@@ -361,6 +374,8 @@ EXTERNAL_MODELS = [
|
|
361 |
"gelectra-base",
|
362 |
"gelectra-large",
|
363 |
"glove.6B.300d",
|
|
|
|
|
364 |
"gottbert-base",
|
365 |
"gtr-t5-base",
|
366 |
"gtr-t5-large",
|
@@ -431,6 +446,7 @@ EXTERNAL_MODELS = [
|
|
431 |
]
|
432 |
|
433 |
EXTERNAL_MODEL_TO_LINK = {
|
|
|
434 |
"Cohere-embed-multilingual-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-v3.0",
|
435 |
"Cohere-embed-multilingual-light-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-light-v3.0",
|
436 |
"allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
|
@@ -447,6 +463,7 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
447 |
"bert-base-swedish-cased": "https://huggingface.co/KB/bert-base-swedish-cased",
|
448 |
"bert-base-uncased": "https://huggingface.co/bert-base-uncased",
|
449 |
"bge-base-zh-v1.5": "https://huggingface.co/BAAI/bge-base-zh-v1.5",
|
|
|
450 |
"bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5",
|
451 |
"bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
|
452 |
"bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5",
|
@@ -477,6 +494,8 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
477 |
"gelectra-base": "https://huggingface.co/deepset/gelectra-base",
|
478 |
"gelectra-large": "https://huggingface.co/deepset/gelectra-large",
|
479 |
"glove.6B.300d": "https://huggingface.co/sentence-transformers/average_word_embeddings_glove.6B.300d",
|
|
|
|
|
480 |
"gottbert-base": "https://huggingface.co/uklfr/gottbert-base",
|
481 |
"gtr-t5-base": "https://huggingface.co/sentence-transformers/gtr-t5-base",
|
482 |
"gtr-t5-large": "https://huggingface.co/sentence-transformers/gtr-t5-large",
|
@@ -550,6 +569,7 @@ EXTERNAL_MODEL_TO_LINK = {
|
|
550 |
}
|
551 |
|
552 |
EXTERNAL_MODEL_TO_DIM = {
|
|
|
553 |
"Cohere-embed-multilingual-v3.0": 1024,
|
554 |
"Cohere-embed-multilingual-light-v3.0": 384,
|
555 |
"all-MiniLM-L12-v2": 384,
|
@@ -565,6 +585,7 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
565 |
"bert-base-swedish-cased": 768,
|
566 |
"bert-base-uncased": 768,
|
567 |
"bge-base-zh-v1.5": 768,
|
|
|
568 |
"bge-large-zh-v1.5": 1024,
|
569 |
"bge-large-zh-noinstruct": 1024,
|
570 |
"bge-small-zh-v1.5": 512,
|
@@ -598,6 +619,8 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
598 |
"gelectra-base": 768,
|
599 |
"gelectra-large": 1024,
|
600 |
"glove.6B.300d": 300,
|
|
|
|
|
601 |
"gottbert-base": 768,
|
602 |
"gtr-t5-base": 768,
|
603 |
"gtr-t5-large": 768,
|
@@ -668,6 +691,7 @@ EXTERNAL_MODEL_TO_DIM = {
|
|
668 |
}
|
669 |
|
670 |
EXTERNAL_MODEL_TO_SEQLEN = {
|
|
|
671 |
"Cohere-embed-multilingual-v3.0": 512,
|
672 |
"Cohere-embed-multilingual-light-v3.0": 512,
|
673 |
"all-MiniLM-L12-v2": 512,
|
@@ -683,6 +707,7 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
683 |
"bert-base-swedish-cased": 512,
|
684 |
"bert-base-uncased": 512,
|
685 |
"bge-base-zh-v1.5": 512,
|
|
|
686 |
"bge-large-zh-v1.5": 512,
|
687 |
"bge-large-zh-noinstruct": 512,
|
688 |
"bge-small-zh-v1.5": 512,
|
@@ -712,6 +737,8 @@ EXTERNAL_MODEL_TO_SEQLEN = {
|
|
712 |
"gbert-large": 512,
|
713 |
"gelectra-base": 512,
|
714 |
"gelectra-large": 512,
|
|
|
|
|
715 |
"gottbert-base": 512,
|
716 |
"glove.6B.300d": "N/A",
|
717 |
"gtr-t5-base": 512,
|
@@ -798,6 +825,7 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
798 |
"bert-base-uncased": 0.44,
|
799 |
"bert-base-swedish-cased": 0.50,
|
800 |
"bge-base-zh-v1.5": 0.41,
|
|
|
801 |
"bge-large-zh-v1.5": 1.30,
|
802 |
"bge-large-zh-noinstruct": 1.30,
|
803 |
"bge-small-zh-v1.5": 0.10,
|
@@ -828,6 +856,8 @@ EXTERNAL_MODEL_TO_SIZE = {
|
|
828 |
"gelectra-base": 0.44,
|
829 |
"gelectra-large": 1.34,
|
830 |
"glove.6B.300d": 0.48,
|
|
|
|
|
831 |
"gottbert-base": 0.51,
|
832 |
"gtr-t5-base": 0.22,
|
833 |
"gtr-t5-large": 0.67,
|
@@ -1018,7 +1048,7 @@ def add_task(examples):
|
|
1018 |
examples["mteb_task"] = "PairClassification"
|
1019 |
elif examples["mteb_dataset_name"] in norm(TASK_LIST_RERANKING + TASK_LIST_RERANKING_FR + TASK_LIST_RERANKING_ZH):
|
1020 |
examples["mteb_task"] = "Reranking"
|
1021 |
-
elif examples["mteb_dataset_name"] in norm(TASK_LIST_RETRIEVAL_NORM + TASK_LIST_RETRIEVAL_FR + TASK_LIST_RETRIEVAL_PL + TASK_LIST_RETRIEVAL_ZH):
|
1022 |
examples["mteb_task"] = "Retrieval"
|
1023 |
elif examples["mteb_dataset_name"] in norm(TASK_LIST_STS + TASK_LIST_STS_FR + TASK_LIST_STS_PL + TASK_LIST_STS_ZH):
|
1024 |
examples["mteb_task"] = "STS"
|
@@ -1452,6 +1482,7 @@ get_mteb_average()
|
|
1452 |
get_mteb_average_fr()
|
1453 |
get_mteb_average_pl()
|
1454 |
get_mteb_average_zh()
|
|
|
1455 |
DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)
|
1456 |
DATA_BITEXT_MINING_OTHER = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING_OTHER)
|
1457 |
DATA_CLASSIFICATION_DA = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_DA)
|
@@ -1460,6 +1491,7 @@ DATA_CLASSIFICATION_SV = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIF
|
|
1460 |
DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)
|
1461 |
DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)
|
1462 |
DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)
|
|
|
1463 |
|
1464 |
# Exact, add all non-nan integer values for every dataset
|
1465 |
NUM_SCORES = 0
|
@@ -1493,6 +1525,7 @@ for d in [
|
|
1493 |
DATA_RETRIEVAL_FR,
|
1494 |
DATA_RETRIEVAL_PL,
|
1495 |
DATA_RETRIEVAL_ZH,
|
|
|
1496 |
DATA_STS_EN,
|
1497 |
DATA_STS_FR,
|
1498 |
DATA_STS_PL,
|
@@ -1593,7 +1626,7 @@ with block:
|
|
1593 |
)
|
1594 |
with gr.Row():
|
1595 |
data_overall_fr = gr.Button("Refresh")
|
1596 |
-
data_overall_fr.click(get_mteb_average_fr, inputs=None, outputs=data_overall_fr)
|
1597 |
with gr.TabItem("Polish"):
|
1598 |
with gr.Row():
|
1599 |
gr.Markdown("""
|
@@ -2139,7 +2172,29 @@ with block:
|
|
2139 |
data_run_retrieval_fr.click(
|
2140 |
partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_FR),
|
2141 |
outputs=data_retrieval_fr,
|
2142 |
-
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
2143 |
with gr.TabItem("Polish"):
|
2144 |
with gr.Row():
|
2145 |
gr.Markdown("""
|
|
|
212 |
"XPQARetrieval (fr)",
|
213 |
]
|
214 |
|
215 |
+
TASK_LIST_RETRIEVAL_LAW = [
|
216 |
+
"AILACasedocs",
|
217 |
+
"AILAStatutes",
|
218 |
+
"GerDaLIRSmall",
|
219 |
+
"LeCaRDv2",
|
220 |
+
"LegalBenchConsumerContractsQA",
|
221 |
+
"LegalBenchCorporateLobbying",
|
222 |
+
"LegalQuAD",
|
223 |
+
"LegalSummarization",
|
224 |
+
]
|
225 |
+
|
226 |
TASK_LIST_RETRIEVAL_PL = [
|
227 |
"ArguAna-PL",
|
228 |
"DBPedia-PL",
|
|
|
332 |
# Models without metadata, thus we cannot fetch their results naturally
|
333 |
EXTERNAL_MODELS = [
|
334 |
"Baichuan-text-embedding",
|
335 |
+
"Cohere-embed-english-v3.0",
|
336 |
"Cohere-embed-multilingual-v3.0",
|
337 |
"Cohere-embed-multilingual-light-v3.0",
|
338 |
"DanskBERT",
|
|
|
351 |
"bert-base-swedish-cased",
|
352 |
"bert-base-uncased",
|
353 |
"bge-base-zh-v1.5",
|
354 |
+
"bge-large-en-v1.5",
|
355 |
"bge-large-zh-v1.5",
|
356 |
"bge-large-zh-noinstruct",
|
357 |
"bge-small-zh-v1.5",
|
|
|
374 |
"gelectra-base",
|
375 |
"gelectra-large",
|
376 |
"glove.6B.300d",
|
377 |
+
"google-gecko.text-embedding-preview-0409",
|
378 |
+
"google-gecko-256.text-embedding-preview-0409",
|
379 |
"gottbert-base",
|
380 |
"gtr-t5-base",
|
381 |
"gtr-t5-large",
|
|
|
446 |
]
|
447 |
|
448 |
EXTERNAL_MODEL_TO_LINK = {
|
449 |
+
"Cohere-embed-english-v3.0": "https://huggingface.co/Cohere/Cohere-embed-english-v3.0",
|
450 |
"Cohere-embed-multilingual-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-v3.0",
|
451 |
"Cohere-embed-multilingual-light-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-light-v3.0",
|
452 |
"allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
|
|
|
463 |
"bert-base-swedish-cased": "https://huggingface.co/KB/bert-base-swedish-cased",
|
464 |
"bert-base-uncased": "https://huggingface.co/bert-base-uncased",
|
465 |
"bge-base-zh-v1.5": "https://huggingface.co/BAAI/bge-base-zh-v1.5",
|
466 |
+
"bge-large-en-v1.5": "https://huggingface.co/BAAI/bge-large-en-v1.5",
|
467 |
"bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5",
|
468 |
"bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
|
469 |
"bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5",
|
|
|
494 |
"gelectra-base": "https://huggingface.co/deepset/gelectra-base",
|
495 |
"gelectra-large": "https://huggingface.co/deepset/gelectra-large",
|
496 |
"glove.6B.300d": "https://huggingface.co/sentence-transformers/average_word_embeddings_glove.6B.300d",
|
497 |
+
"google-gecko.text-embedding-preview-0409": "https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#latest_models",
|
498 |
+
"google-gecko-256.text-embedding-preview-0409": "https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#latest_models",
|
499 |
"gottbert-base": "https://huggingface.co/uklfr/gottbert-base",
|
500 |
"gtr-t5-base": "https://huggingface.co/sentence-transformers/gtr-t5-base",
|
501 |
"gtr-t5-large": "https://huggingface.co/sentence-transformers/gtr-t5-large",
|
|
|
569 |
}
|
570 |
|
571 |
EXTERNAL_MODEL_TO_DIM = {
|
572 |
+
"Cohere-embed-english-v3.0": 1024,
|
573 |
"Cohere-embed-multilingual-v3.0": 1024,
|
574 |
"Cohere-embed-multilingual-light-v3.0": 384,
|
575 |
"all-MiniLM-L12-v2": 384,
|
|
|
585 |
"bert-base-swedish-cased": 768,
|
586 |
"bert-base-uncased": 768,
|
587 |
"bge-base-zh-v1.5": 768,
|
588 |
+
"bge-large-en-v1.5": 1024,
|
589 |
"bge-large-zh-v1.5": 1024,
|
590 |
"bge-large-zh-noinstruct": 1024,
|
591 |
"bge-small-zh-v1.5": 512,
|
|
|
619 |
"gelectra-base": 768,
|
620 |
"gelectra-large": 1024,
|
621 |
"glove.6B.300d": 300,
|
622 |
+
"google-gecko.text-embedding-preview-0409": 768,
|
623 |
+
"google-gecko-256.text-embedding-preview-0409": 256,
|
624 |
"gottbert-base": 768,
|
625 |
"gtr-t5-base": 768,
|
626 |
"gtr-t5-large": 768,
|
|
|
691 |
}
|
692 |
|
693 |
EXTERNAL_MODEL_TO_SEQLEN = {
|
694 |
+
"Cohere-embed-english-v3.0": 512,
|
695 |
"Cohere-embed-multilingual-v3.0": 512,
|
696 |
"Cohere-embed-multilingual-light-v3.0": 512,
|
697 |
"all-MiniLM-L12-v2": 512,
|
|
|
707 |
"bert-base-swedish-cased": 512,
|
708 |
"bert-base-uncased": 512,
|
709 |
"bge-base-zh-v1.5": 512,
|
710 |
+
"bge-large-en-v1.5": 512,
|
711 |
"bge-large-zh-v1.5": 512,
|
712 |
"bge-large-zh-noinstruct": 512,
|
713 |
"bge-small-zh-v1.5": 512,
|
|
|
737 |
"gbert-large": 512,
|
738 |
"gelectra-base": 512,
|
739 |
"gelectra-large": 512,
|
740 |
+
"google-gecko.text-embedding-preview-0409": 2048,
|
741 |
+
"google-gecko-256.text-embedding-preview-0409": 2048,
|
742 |
"gottbert-base": 512,
|
743 |
"glove.6B.300d": "N/A",
|
744 |
"gtr-t5-base": 512,
|
|
|
825 |
"bert-base-uncased": 0.44,
|
826 |
"bert-base-swedish-cased": 0.50,
|
827 |
"bge-base-zh-v1.5": 0.41,
|
828 |
+
"bge-large-en-v1.5": 1.30,
|
829 |
"bge-large-zh-v1.5": 1.30,
|
830 |
"bge-large-zh-noinstruct": 1.30,
|
831 |
"bge-small-zh-v1.5": 0.10,
|
|
|
856 |
"gelectra-base": 0.44,
|
857 |
"gelectra-large": 1.34,
|
858 |
"glove.6B.300d": 0.48,
|
859 |
+
"google-gecko.text-embedding-preview-0409": 2.29,
|
860 |
+
"google-gecko-256.text-embedding-preview-0409": 2.29,
|
861 |
"gottbert-base": 0.51,
|
862 |
"gtr-t5-base": 0.22,
|
863 |
"gtr-t5-large": 0.67,
|
|
|
1048 |
examples["mteb_task"] = "PairClassification"
|
1049 |
elif examples["mteb_dataset_name"] in norm(TASK_LIST_RERANKING + TASK_LIST_RERANKING_FR + TASK_LIST_RERANKING_ZH):
|
1050 |
examples["mteb_task"] = "Reranking"
|
1051 |
+
elif examples["mteb_dataset_name"] in norm(TASK_LIST_RETRIEVAL_NORM + TASK_LIST_RETRIEVAL_FR + TASK_LIST_RETRIEVAL_PL + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_RETRIEVAL_LAW):
|
1052 |
examples["mteb_task"] = "Retrieval"
|
1053 |
elif examples["mteb_dataset_name"] in norm(TASK_LIST_STS + TASK_LIST_STS_FR + TASK_LIST_STS_PL + TASK_LIST_STS_ZH):
|
1054 |
examples["mteb_task"] = "STS"
|
|
|
1482 |
get_mteb_average_fr()
|
1483 |
get_mteb_average_pl()
|
1484 |
get_mteb_average_zh()
|
1485 |
+
|
1486 |
DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)
|
1487 |
DATA_BITEXT_MINING_OTHER = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING_OTHER)
|
1488 |
DATA_CLASSIFICATION_DA = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_DA)
|
|
|
1491 |
DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)
|
1492 |
DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)
|
1493 |
DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)
|
1494 |
+
DATA_RETRIEVAL_LAW = get_mteb_data(["Retrieval"], [], TASK_LIST_RETRIEVAL_LAW)
|
1495 |
|
1496 |
# Exact, add all non-nan integer values for every dataset
|
1497 |
NUM_SCORES = 0
|
|
|
1525 |
DATA_RETRIEVAL_FR,
|
1526 |
DATA_RETRIEVAL_PL,
|
1527 |
DATA_RETRIEVAL_ZH,
|
1528 |
+
DATA_RETRIEVAL_LAW,
|
1529 |
DATA_STS_EN,
|
1530 |
DATA_STS_FR,
|
1531 |
DATA_STS_PL,
|
|
|
1626 |
)
|
1627 |
with gr.Row():
|
1628 |
data_overall_fr = gr.Button("Refresh")
|
1629 |
+
data_overall_fr.click(get_mteb_average_fr, inputs=None, outputs=data_overall_fr)
|
1630 |
with gr.TabItem("Polish"):
|
1631 |
with gr.Row():
|
1632 |
gr.Markdown("""
|
|
|
2172 |
data_run_retrieval_fr.click(
|
2173 |
partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_FR),
|
2174 |
outputs=data_retrieval_fr,
|
2175 |
+
)
|
2176 |
+
with gr.TabItem("Law"):
|
2177 |
+
with gr.Row():
|
2178 |
+
gr.Markdown("""
|
2179 |
+
**Retrieval Law Leaderboard** 🔎⚖️
|
2180 |
+
|
2181 |
+
- **Metric:** Normalized Discounted Cumulative Gain @ k (ndcg_at_10)
|
2182 |
+
- **Languages:** English, German, Chinese
|
2183 |
+
- **Credits:** [Voyage AI](https://www.voyageai.com/)
|
2184 |
+
""")
|
2185 |
+
with gr.Row():
|
2186 |
+
data_retrieval_law = gr.components.Dataframe(
|
2187 |
+
DATA_RETRIEVAL_LAW,
|
2188 |
+
# Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
|
2189 |
+
datatype=["number", "markdown"] + ["number"] * len(DATA_RETRIEVAL_LAW.columns) * 2,
|
2190 |
+
type="pandas",
|
2191 |
+
)
|
2192 |
+
with gr.Row():
|
2193 |
+
data_run_retrieval_law = gr.Button("Refresh")
|
2194 |
+
data_run_retrieval_law.click(
|
2195 |
+
partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_LAW),
|
2196 |
+
outputs=data_retrieval_law,
|
2197 |
+
)
|
2198 |
with gr.TabItem("Polish"):
|
2199 |
with gr.Row():
|
2200 |
gr.Markdown("""
|