Muennighoff commited on
Commit
ac3fdf5
Β·
1 Parent(s): 842d3bc

Rename BTM

Browse files
Files changed (1) hide show
  1. app.py +14 -8
app.py CHANGED
@@ -54,7 +54,7 @@ TASK_LIST_CLASSIFICATION_NB = [
54
  "NorwegianParliament",
55
  "MassiveIntentClassification (nb)",
56
  "MassiveScenarioClassification (nb)",
57
- "ScalaNbClassification (nb)",
58
  ]
59
 
60
  TASK_LIST_CLASSIFICATION_SV = [
@@ -62,7 +62,6 @@ TASK_LIST_CLASSIFICATION_SV = [
62
  "MassiveIntentClassification (sv)",
63
  "MassiveScenarioClassification (sv)",
64
  "NordicLangClassification",
65
- "ScalaNbClassification",
66
  "ScalaSvClassification",
67
  "SweRecClassification",
68
  ]
@@ -587,6 +586,15 @@ def get_dim_seq_size(model):
587
  size = round(size["metadata"]["total_size"] / 1e9, 2)
588
  return dim, seq, size
589
 
 
 
 
 
 
 
 
 
 
590
  def add_rank(df):
591
  cols_to_rank = [col for col in df.columns if col not in ["Model", "Model Size (GB)", "Embedding Dimensions", "Sequence Length"]]
592
  if len(cols_to_rank) == 1:
@@ -659,8 +667,6 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
659
  df = pd.DataFrame(df_list)
660
  # If there are any models that are the same, merge them
661
  # E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
662
- # Save to csv
663
- df.to_csv("mteb.csv", index=False)
664
  df = df.groupby("Model", as_index=False).first()
665
  # Put 'Model' column first
666
  cols = sorted(list(df.columns))
@@ -780,7 +786,7 @@ with block:
780
  with gr.TabItem("English-X"):
781
  with gr.Row():
782
  gr.Markdown("""
783
- **Bitext Mining Leaderboard 🏴󠁧󠁒󠁳󠁣󠁴󠁿**
784
 
785
  - **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
786
  - **Languages:** 117 (Pairs of: English & other language)
@@ -801,13 +807,13 @@ with block:
801
  inputs=[task_bitext_mining, lang_bitext_mining_other, datasets_bitext_mining_other],
802
  outputs=data_bitext_mining,
803
  )
804
- with gr.TabItem("Other"):
805
  with gr.Row():
806
  gr.Markdown("""
807
- **Bitext Mining Other Leaderboard 🎌**
808
 
809
  - **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
810
- - **Languages:** 2 (Pair of: Danish & Bornholmsk)
811
  - **Credits:** [Kenneth Enevoldsen](https://github.com/KennethEnevoldsen)
812
  """)
813
  with gr.Row():
 
54
  "NorwegianParliament",
55
  "MassiveIntentClassification (nb)",
56
  "MassiveScenarioClassification (nb)",
57
+ "ScalaNbClassification",
58
  ]
59
 
60
  TASK_LIST_CLASSIFICATION_SV = [
 
62
  "MassiveIntentClassification (sv)",
63
  "MassiveScenarioClassification (sv)",
64
  "NordicLangClassification",
 
65
  "ScalaSvClassification",
66
  "SweRecClassification",
67
  ]
 
586
  size = round(size["metadata"]["total_size"] / 1e9, 2)
587
  return dim, seq, size
588
 
589
+ def make_datasets_clickable(df):
590
+ """Does not work"""
591
+ if "BornholmBitextMining" in df.columns:
592
+ link = "https://huggingface.co/datasets/strombergnlp/bornholmsk_parallel"
593
+ df = df.rename(
594
+ columns={f'BornholmBitextMining': '<a target="_blank" style="text-decoration: underline" href="{link}">BornholmBitextMining</a>',})
595
+ return df
596
+
597
+
598
  def add_rank(df):
599
  cols_to_rank = [col for col in df.columns if col not in ["Model", "Model Size (GB)", "Embedding Dimensions", "Sequence Length"]]
600
  if len(cols_to_rank) == 1:
 
667
  df = pd.DataFrame(df_list)
668
  # If there are any models that are the same, merge them
669
  # E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
 
 
670
  df = df.groupby("Model", as_index=False).first()
671
  # Put 'Model' column first
672
  cols = sorted(list(df.columns))
 
786
  with gr.TabItem("English-X"):
787
  with gr.Row():
788
  gr.Markdown("""
789
+ **Bitext Mining Leaderboard 🎌**
790
 
791
  - **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
792
  - **Languages:** 117 (Pairs of: English & other language)
 
807
  inputs=[task_bitext_mining, lang_bitext_mining_other, datasets_bitext_mining_other],
808
  outputs=data_bitext_mining,
809
  )
810
+ with gr.TabItem("Danish"):
811
  with gr.Row():
812
  gr.Markdown("""
813
+ **Bitext Mining Danish Leaderboard πŸ‡©πŸ‡°πŸŽŒ**
814
 
815
  - **Metric:** [F1](https://huggingface.co/spaces/evaluate-metric/f1)
816
+ - **Languages:** Danish & Bornholmsk (Danish Dialect)
817
  - **Credits:** [Kenneth Enevoldsen](https://github.com/KennethEnevoldsen)
818
  """)
819
  with gr.Row():