Muennighoff commited on
Commit
7287938
·
verified ·
1 Parent(s): 5c90ee9

Law tab & Google Gecko (#90)

Browse files

- Add law & gecko (75488a8a46d09bf484baa94f4fab0c63704a044b)
- Upload EXTERNAL_MODEL_RESULTS.json (89464054d60c4e429fac163285e61c865ec35da9)
- Update app.py (ea7ebb3bfcbaccc8398de8f5ea8b8f2aadf5e58d)
- Update app.py (27cdb589018666de9affba50f68209d5f4090759)
- Upload EXTERNAL_MODEL_RESULTS.json (91da484e2609d16f209bf38a7493ce861e1354da)

Files changed (2) hide show
  1. EXTERNAL_MODEL_RESULTS.json +0 -0
  2. app.py +58 -3
EXTERNAL_MODEL_RESULTS.json CHANGED
The diff for this file is too large to render. See raw diff
 
app.py CHANGED
@@ -212,6 +212,17 @@ TASK_LIST_RETRIEVAL_FR = [
212
  "XPQARetrieval (fr)",
213
  ]
214
 
 
 
 
 
 
 
 
 
 
 
 
215
  TASK_LIST_RETRIEVAL_PL = [
216
  "ArguAna-PL",
217
  "DBPedia-PL",
@@ -321,6 +332,7 @@ def make_clickable_model(model_name, link=None):
321
  # Models without metadata, thus we cannot fetch their results naturally
322
  EXTERNAL_MODELS = [
323
  "Baichuan-text-embedding",
 
324
  "Cohere-embed-multilingual-v3.0",
325
  "Cohere-embed-multilingual-light-v3.0",
326
  "DanskBERT",
@@ -339,6 +351,7 @@ EXTERNAL_MODELS = [
339
  "bert-base-swedish-cased",
340
  "bert-base-uncased",
341
  "bge-base-zh-v1.5",
 
342
  "bge-large-zh-v1.5",
343
  "bge-large-zh-noinstruct",
344
  "bge-small-zh-v1.5",
@@ -361,6 +374,8 @@ EXTERNAL_MODELS = [
361
  "gelectra-base",
362
  "gelectra-large",
363
  "glove.6B.300d",
 
 
364
  "gottbert-base",
365
  "gtr-t5-base",
366
  "gtr-t5-large",
@@ -431,6 +446,7 @@ EXTERNAL_MODELS = [
431
  ]
432
 
433
  EXTERNAL_MODEL_TO_LINK = {
 
434
  "Cohere-embed-multilingual-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-v3.0",
435
  "Cohere-embed-multilingual-light-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-light-v3.0",
436
  "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
@@ -447,6 +463,7 @@ EXTERNAL_MODEL_TO_LINK = {
447
  "bert-base-swedish-cased": "https://huggingface.co/KB/bert-base-swedish-cased",
448
  "bert-base-uncased": "https://huggingface.co/bert-base-uncased",
449
  "bge-base-zh-v1.5": "https://huggingface.co/BAAI/bge-base-zh-v1.5",
 
450
  "bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5",
451
  "bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
452
  "bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5",
@@ -477,6 +494,8 @@ EXTERNAL_MODEL_TO_LINK = {
477
  "gelectra-base": "https://huggingface.co/deepset/gelectra-base",
478
  "gelectra-large": "https://huggingface.co/deepset/gelectra-large",
479
  "glove.6B.300d": "https://huggingface.co/sentence-transformers/average_word_embeddings_glove.6B.300d",
 
 
480
  "gottbert-base": "https://huggingface.co/uklfr/gottbert-base",
481
  "gtr-t5-base": "https://huggingface.co/sentence-transformers/gtr-t5-base",
482
  "gtr-t5-large": "https://huggingface.co/sentence-transformers/gtr-t5-large",
@@ -550,6 +569,7 @@ EXTERNAL_MODEL_TO_LINK = {
550
  }
551
 
552
  EXTERNAL_MODEL_TO_DIM = {
 
553
  "Cohere-embed-multilingual-v3.0": 1024,
554
  "Cohere-embed-multilingual-light-v3.0": 384,
555
  "all-MiniLM-L12-v2": 384,
@@ -565,6 +585,7 @@ EXTERNAL_MODEL_TO_DIM = {
565
  "bert-base-swedish-cased": 768,
566
  "bert-base-uncased": 768,
567
  "bge-base-zh-v1.5": 768,
 
568
  "bge-large-zh-v1.5": 1024,
569
  "bge-large-zh-noinstruct": 1024,
570
  "bge-small-zh-v1.5": 512,
@@ -598,6 +619,8 @@ EXTERNAL_MODEL_TO_DIM = {
598
  "gelectra-base": 768,
599
  "gelectra-large": 1024,
600
  "glove.6B.300d": 300,
 
 
601
  "gottbert-base": 768,
602
  "gtr-t5-base": 768,
603
  "gtr-t5-large": 768,
@@ -668,6 +691,7 @@ EXTERNAL_MODEL_TO_DIM = {
668
  }
669
 
670
  EXTERNAL_MODEL_TO_SEQLEN = {
 
671
  "Cohere-embed-multilingual-v3.0": 512,
672
  "Cohere-embed-multilingual-light-v3.0": 512,
673
  "all-MiniLM-L12-v2": 512,
@@ -683,6 +707,7 @@ EXTERNAL_MODEL_TO_SEQLEN = {
683
  "bert-base-swedish-cased": 512,
684
  "bert-base-uncased": 512,
685
  "bge-base-zh-v1.5": 512,
 
686
  "bge-large-zh-v1.5": 512,
687
  "bge-large-zh-noinstruct": 512,
688
  "bge-small-zh-v1.5": 512,
@@ -712,6 +737,8 @@ EXTERNAL_MODEL_TO_SEQLEN = {
712
  "gbert-large": 512,
713
  "gelectra-base": 512,
714
  "gelectra-large": 512,
 
 
715
  "gottbert-base": 512,
716
  "glove.6B.300d": "N/A",
717
  "gtr-t5-base": 512,
@@ -798,6 +825,7 @@ EXTERNAL_MODEL_TO_SIZE = {
798
  "bert-base-uncased": 0.44,
799
  "bert-base-swedish-cased": 0.50,
800
  "bge-base-zh-v1.5": 0.41,
 
801
  "bge-large-zh-v1.5": 1.30,
802
  "bge-large-zh-noinstruct": 1.30,
803
  "bge-small-zh-v1.5": 0.10,
@@ -828,6 +856,8 @@ EXTERNAL_MODEL_TO_SIZE = {
828
  "gelectra-base": 0.44,
829
  "gelectra-large": 1.34,
830
  "glove.6B.300d": 0.48,
 
 
831
  "gottbert-base": 0.51,
832
  "gtr-t5-base": 0.22,
833
  "gtr-t5-large": 0.67,
@@ -1018,7 +1048,7 @@ def add_task(examples):
1018
  examples["mteb_task"] = "PairClassification"
1019
  elif examples["mteb_dataset_name"] in norm(TASK_LIST_RERANKING + TASK_LIST_RERANKING_FR + TASK_LIST_RERANKING_ZH):
1020
  examples["mteb_task"] = "Reranking"
1021
- elif examples["mteb_dataset_name"] in norm(TASK_LIST_RETRIEVAL_NORM + TASK_LIST_RETRIEVAL_FR + TASK_LIST_RETRIEVAL_PL + TASK_LIST_RETRIEVAL_ZH):
1022
  examples["mteb_task"] = "Retrieval"
1023
  elif examples["mteb_dataset_name"] in norm(TASK_LIST_STS + TASK_LIST_STS_FR + TASK_LIST_STS_PL + TASK_LIST_STS_ZH):
1024
  examples["mteb_task"] = "STS"
@@ -1452,6 +1482,7 @@ get_mteb_average()
1452
  get_mteb_average_fr()
1453
  get_mteb_average_pl()
1454
  get_mteb_average_zh()
 
1455
  DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)
1456
  DATA_BITEXT_MINING_OTHER = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING_OTHER)
1457
  DATA_CLASSIFICATION_DA = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_DA)
@@ -1460,6 +1491,7 @@ DATA_CLASSIFICATION_SV = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIF
1460
  DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)
1461
  DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)
1462
  DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)
 
1463
 
1464
  # Exact, add all non-nan integer values for every dataset
1465
  NUM_SCORES = 0
@@ -1493,6 +1525,7 @@ for d in [
1493
  DATA_RETRIEVAL_FR,
1494
  DATA_RETRIEVAL_PL,
1495
  DATA_RETRIEVAL_ZH,
 
1496
  DATA_STS_EN,
1497
  DATA_STS_FR,
1498
  DATA_STS_PL,
@@ -1593,7 +1626,7 @@ with block:
1593
  )
1594
  with gr.Row():
1595
  data_overall_fr = gr.Button("Refresh")
1596
- data_overall_fr.click(get_mteb_average_fr, inputs=None, outputs=data_overall_fr)
1597
  with gr.TabItem("Polish"):
1598
  with gr.Row():
1599
  gr.Markdown("""
@@ -2139,7 +2172,29 @@ with block:
2139
  data_run_retrieval_fr.click(
2140
  partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_FR),
2141
  outputs=data_retrieval_fr,
2142
- )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2143
  with gr.TabItem("Polish"):
2144
  with gr.Row():
2145
  gr.Markdown("""
 
212
  "XPQARetrieval (fr)",
213
  ]
214
 
215
+ TASK_LIST_RETRIEVAL_LAW = [
216
+ "AILACasedocs",
217
+ "AILAStatutes",
218
+ "GerDaLIRSmall",
219
+ "LeCaRDv2",
220
+ "LegalBenchConsumerContractsQA",
221
+ "LegalBenchCorporateLobbying",
222
+ "LegalQuAD",
223
+ "LegalSummarization",
224
+ ]
225
+
226
  TASK_LIST_RETRIEVAL_PL = [
227
  "ArguAna-PL",
228
  "DBPedia-PL",
 
332
  # Models without metadata, thus we cannot fetch their results naturally
333
  EXTERNAL_MODELS = [
334
  "Baichuan-text-embedding",
335
+ "Cohere-embed-english-v3.0",
336
  "Cohere-embed-multilingual-v3.0",
337
  "Cohere-embed-multilingual-light-v3.0",
338
  "DanskBERT",
 
351
  "bert-base-swedish-cased",
352
  "bert-base-uncased",
353
  "bge-base-zh-v1.5",
354
+ "bge-large-en-v1.5",
355
  "bge-large-zh-v1.5",
356
  "bge-large-zh-noinstruct",
357
  "bge-small-zh-v1.5",
 
374
  "gelectra-base",
375
  "gelectra-large",
376
  "glove.6B.300d",
377
+ "google-gecko.text-embedding-preview-0409",
378
+ "google-gecko-256.text-embedding-preview-0409",
379
  "gottbert-base",
380
  "gtr-t5-base",
381
  "gtr-t5-large",
 
446
  ]
447
 
448
  EXTERNAL_MODEL_TO_LINK = {
449
+ "Cohere-embed-english-v3.0": "https://huggingface.co/Cohere/Cohere-embed-english-v3.0",
450
  "Cohere-embed-multilingual-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-v3.0",
451
  "Cohere-embed-multilingual-light-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-light-v3.0",
452
  "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
 
463
  "bert-base-swedish-cased": "https://huggingface.co/KB/bert-base-swedish-cased",
464
  "bert-base-uncased": "https://huggingface.co/bert-base-uncased",
465
  "bge-base-zh-v1.5": "https://huggingface.co/BAAI/bge-base-zh-v1.5",
466
+ "bge-large-en-v1.5": "https://huggingface.co/BAAI/bge-large-en-v1.5",
467
  "bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5",
468
  "bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
469
  "bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5",
 
494
  "gelectra-base": "https://huggingface.co/deepset/gelectra-base",
495
  "gelectra-large": "https://huggingface.co/deepset/gelectra-large",
496
  "glove.6B.300d": "https://huggingface.co/sentence-transformers/average_word_embeddings_glove.6B.300d",
497
+ "google-gecko.text-embedding-preview-0409": "https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#latest_models",
498
+ "google-gecko-256.text-embedding-preview-0409": "https://cloud.google.com/vertex-ai/generative-ai/docs/embeddings/get-text-embeddings#latest_models",
499
  "gottbert-base": "https://huggingface.co/uklfr/gottbert-base",
500
  "gtr-t5-base": "https://huggingface.co/sentence-transformers/gtr-t5-base",
501
  "gtr-t5-large": "https://huggingface.co/sentence-transformers/gtr-t5-large",
 
569
  }
570
 
571
  EXTERNAL_MODEL_TO_DIM = {
572
+ "Cohere-embed-english-v3.0": 1024,
573
  "Cohere-embed-multilingual-v3.0": 1024,
574
  "Cohere-embed-multilingual-light-v3.0": 384,
575
  "all-MiniLM-L12-v2": 384,
 
585
  "bert-base-swedish-cased": 768,
586
  "bert-base-uncased": 768,
587
  "bge-base-zh-v1.5": 768,
588
+ "bge-large-en-v1.5": 1024,
589
  "bge-large-zh-v1.5": 1024,
590
  "bge-large-zh-noinstruct": 1024,
591
  "bge-small-zh-v1.5": 512,
 
619
  "gelectra-base": 768,
620
  "gelectra-large": 1024,
621
  "glove.6B.300d": 300,
622
+ "google-gecko.text-embedding-preview-0409": 768,
623
+ "google-gecko-256.text-embedding-preview-0409": 256,
624
  "gottbert-base": 768,
625
  "gtr-t5-base": 768,
626
  "gtr-t5-large": 768,
 
691
  }
692
 
693
  EXTERNAL_MODEL_TO_SEQLEN = {
694
+ "Cohere-embed-english-v3.0": 512,
695
  "Cohere-embed-multilingual-v3.0": 512,
696
  "Cohere-embed-multilingual-light-v3.0": 512,
697
  "all-MiniLM-L12-v2": 512,
 
707
  "bert-base-swedish-cased": 512,
708
  "bert-base-uncased": 512,
709
  "bge-base-zh-v1.5": 512,
710
+ "bge-large-en-v1.5": 512,
711
  "bge-large-zh-v1.5": 512,
712
  "bge-large-zh-noinstruct": 512,
713
  "bge-small-zh-v1.5": 512,
 
737
  "gbert-large": 512,
738
  "gelectra-base": 512,
739
  "gelectra-large": 512,
740
+ "google-gecko.text-embedding-preview-0409": 2048,
741
+ "google-gecko-256.text-embedding-preview-0409": 2048,
742
  "gottbert-base": 512,
743
  "glove.6B.300d": "N/A",
744
  "gtr-t5-base": 512,
 
825
  "bert-base-uncased": 0.44,
826
  "bert-base-swedish-cased": 0.50,
827
  "bge-base-zh-v1.5": 0.41,
828
+ "bge-large-en-v1.5": 1.30,
829
  "bge-large-zh-v1.5": 1.30,
830
  "bge-large-zh-noinstruct": 1.30,
831
  "bge-small-zh-v1.5": 0.10,
 
856
  "gelectra-base": 0.44,
857
  "gelectra-large": 1.34,
858
  "glove.6B.300d": 0.48,
859
+ "google-gecko.text-embedding-preview-0409": 2.29,
860
+ "google-gecko-256.text-embedding-preview-0409": 2.29,
861
  "gottbert-base": 0.51,
862
  "gtr-t5-base": 0.22,
863
  "gtr-t5-large": 0.67,
 
1048
  examples["mteb_task"] = "PairClassification"
1049
  elif examples["mteb_dataset_name"] in norm(TASK_LIST_RERANKING + TASK_LIST_RERANKING_FR + TASK_LIST_RERANKING_ZH):
1050
  examples["mteb_task"] = "Reranking"
1051
+ elif examples["mteb_dataset_name"] in norm(TASK_LIST_RETRIEVAL_NORM + TASK_LIST_RETRIEVAL_FR + TASK_LIST_RETRIEVAL_PL + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_RETRIEVAL_LAW):
1052
  examples["mteb_task"] = "Retrieval"
1053
  elif examples["mteb_dataset_name"] in norm(TASK_LIST_STS + TASK_LIST_STS_FR + TASK_LIST_STS_PL + TASK_LIST_STS_ZH):
1054
  examples["mteb_task"] = "STS"
 
1482
  get_mteb_average_fr()
1483
  get_mteb_average_pl()
1484
  get_mteb_average_zh()
1485
+
1486
  DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)
1487
  DATA_BITEXT_MINING_OTHER = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING_OTHER)
1488
  DATA_CLASSIFICATION_DA = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_DA)
 
1491
  DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)
1492
  DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)
1493
  DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)
1494
+ DATA_RETRIEVAL_LAW = get_mteb_data(["Retrieval"], [], TASK_LIST_RETRIEVAL_LAW)
1495
 
1496
  # Exact, add all non-nan integer values for every dataset
1497
  NUM_SCORES = 0
 
1525
  DATA_RETRIEVAL_FR,
1526
  DATA_RETRIEVAL_PL,
1527
  DATA_RETRIEVAL_ZH,
1528
+ DATA_RETRIEVAL_LAW,
1529
  DATA_STS_EN,
1530
  DATA_STS_FR,
1531
  DATA_STS_PL,
 
1626
  )
1627
  with gr.Row():
1628
  data_overall_fr = gr.Button("Refresh")
1629
+ data_overall_fr.click(get_mteb_average_fr, inputs=None, outputs=data_overall_fr)
1630
  with gr.TabItem("Polish"):
1631
  with gr.Row():
1632
  gr.Markdown("""
 
2172
  data_run_retrieval_fr.click(
2173
  partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_FR),
2174
  outputs=data_retrieval_fr,
2175
+ )
2176
+ with gr.TabItem("Law"):
2177
+ with gr.Row():
2178
+ gr.Markdown("""
2179
+ **Retrieval Law Leaderboard** 🔎⚖️
2180
+
2181
+ - **Metric:** Normalized Discounted Cumulative Gain @ k (ndcg_at_10)
2182
+ - **Languages:** English, German, Chinese
2183
+ - **Credits:** [Voyage AI](https://www.voyageai.com/)
2184
+ """)
2185
+ with gr.Row():
2186
+ data_retrieval_law = gr.components.Dataframe(
2187
+ DATA_RETRIEVAL_LAW,
2188
+ # Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
2189
+ datatype=["number", "markdown"] + ["number"] * len(DATA_RETRIEVAL_LAW.columns) * 2,
2190
+ type="pandas",
2191
+ )
2192
+ with gr.Row():
2193
+ data_run_retrieval_law = gr.Button("Refresh")
2194
+ data_run_retrieval_law.click(
2195
+ partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_LAW),
2196
+ outputs=data_retrieval_law,
2197
+ )
2198
  with gr.TabItem("Polish"):
2199
  with gr.Row():
2200
  gr.Markdown("""