binwang commited on
Commit
b9f1bdb
·
verified ·
1 Parent(s): eba6af5

Upload folder using huggingface_hub

Browse files
Files changed (34) hide show
  1. results/cross_lingual/zero_shot/cross_logiqa.csv +1 -1
  2. results/cross_lingual/zero_shot/cross_logiqa_no_prompt.csv +8 -1
  3. results/cross_lingual/zero_shot/cross_mmlu.csv +1 -1
  4. results/cross_lingual/zero_shot/cross_mmlu_no_prompt.csv +8 -1
  5. results/cross_lingual/zero_shot/cross_xquad.csv +1 -1
  6. results/cross_lingual/zero_shot/cross_xquad_no_prompt.csv +8 -1
  7. results/cultural_reasoning/zero_shot/cn_eval.csv +1 -1
  8. results/cultural_reasoning/zero_shot/ph_eval.csv +1 -1
  9. results/cultural_reasoning/zero_shot/sg_eval_v2_mcq_no_prompt.csv +8 -1
  10. results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv +1 -1
  11. results/cultural_reasoning/zero_shot/us_eval.csv +1 -1
  12. results/dialogue/zero_shot/dialogsum.csv +1 -1
  13. results/dialogue/zero_shot/dream.csv +1 -1
  14. results/dialogue/zero_shot/samsum.csv +1 -1
  15. results/emotion/zero_shot/ind_emotion.csv +1 -1
  16. results/emotion/zero_shot/sst2.csv +1 -1
  17. results/flores_translation/zero_shot/ind2eng.csv +2 -1
  18. results/flores_translation/zero_shot/vie2eng.csv +2 -1
  19. results/flores_translation/zero_shot/zho2eng.csv +2 -1
  20. results/flores_translation/zero_shot/zsm2eng.csv +2 -1
  21. results/fundamental_nlp_tasks/zero_shot/c3.csv +2 -1
  22. results/fundamental_nlp_tasks/zero_shot/cola.csv +1 -1
  23. results/fundamental_nlp_tasks/zero_shot/mnli.csv +1 -1
  24. results/fundamental_nlp_tasks/zero_shot/mrpc.csv +1 -1
  25. results/fundamental_nlp_tasks/zero_shot/ocnli.csv +1 -1
  26. results/fundamental_nlp_tasks/zero_shot/qnli.csv +1 -1
  27. results/fundamental_nlp_tasks/zero_shot/qqp.csv +1 -1
  28. results/fundamental_nlp_tasks/zero_shot/rte.csv +1 -1
  29. results/fundamental_nlp_tasks/zero_shot/wnli.csv +1 -1
  30. results/general_reasoning/zero_shot/c_eval.csv +1 -1
  31. results/general_reasoning/zero_shot/cmmlu_no_prompt.csv +12 -0
  32. results/general_reasoning/zero_shot/indommlu_no_prompt.csv +5 -1
  33. results/general_reasoning/zero_shot/mmlu_no_prompt.csv +5 -1
  34. results/general_reasoning/zero_shot/zbench.csv +1 -1
results/cross_lingual/zero_shot/cross_logiqa.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.46834415584415584,0.348538961038961,0.3996561615557665,0
8
  Qwen2-72B-Instruct,0.6728896103896104,0.6762987012987012,0.6745898487968579,0.75,0.8068181818181818,0.6534090909090909,0.6193181818181818,0.625,0.6534090909090909,0.6022727272727273
9
  Sailor2-8B-Chat,0.5405844155844156,0.5628246753246753,0.551480408610067,0.625,0.5852272727272727,0.4943181818181818,0.5568181818181818,0.5056818181818182,0.5568181818181818,0.4602272727272727
10
  Meta-Llama-3-8B-Instruct,0.4610389610389611,0.45097402597402597,0.4559509553669637,0.5965909090909091,0.48295454545454547,0.5,0.4602272727272727,0.42045454545454547,0.4034090909090909,0.36363636363636365
11
- MERaLiON-Llama-3-8B-Instruct,0.4829545454545454,0.4952922077922078,0.48904557518459746,0.5397727272727273,0.5340909090909091,0.4772727272727273,0.5056818181818182,0.4602272727272727,0.45454545454545453,0.4090909090909091
12
  Meta-Llama-3.1-70B-Instruct,0.6566558441558442,0.598051948051948,0.6259852839118454,0.7443181818181818,0.7215909090909091,0.6647727272727273,0.6534090909090909,0.6193181818181818,0.625,0.5681818181818182
13
  Qwen2_5_3B_Instruct,0.4878246753246754,0.3594155844155844,0.41388918606681485,0.6079545454545454,0.6420454545454546,0.45454545454545453,0.4602272727272727,0.48295454545454547,0.42045454545454547,0.3465909090909091
14
  SeaLLMs-v3-7B-Chat,0.5551948051948051,0.5142857142857142,0.5339578453833284,0.6022727272727273,0.6647727272727273,0.5738636363636364,0.5454545454545454,0.5170454545454546,0.5,0.48295454545454547
15
  Qwen2_5_72B_Instruct,0.7248376623376623,0.6852272727272727,0.7044761161663122,0.8011363636363636,0.7954545454545454,0.7272727272727273,0.6704545454545454,0.7159090909090909,0.7159090909090909,0.6477272727272727
 
16
  gemma-2-9b-it,0.6185064935064934,0.5592532467532466,0.5873893507784849,0.6647727272727273,0.6761363636363636,0.5625,0.6193181818181818,0.5795454545454546,0.6420454545454546,0.5852272727272727
17
  Meta-Llama-3-70B-Instruct,0.6306818181818182,0.6186688311688312,0.6246175698800746,0.7102272727272727,0.6875,0.6420454545454546,0.6193181818181818,0.6022727272727273,0.6136363636363636,0.5397727272727273
18
  Qwen2_5_14B_Instruct,0.6436688311688312,0.5938311688311688,0.6177464473895627,0.75,0.7386363636363636,0.6306818181818182,0.6420454545454546,0.6136363636363636,0.5965909090909091,0.5340909090909091
 
8
  Qwen2-72B-Instruct,0.6728896103896104,0.6762987012987012,0.6745898487968579,0.75,0.8068181818181818,0.6534090909090909,0.6193181818181818,0.625,0.6534090909090909,0.6022727272727273
9
  Sailor2-8B-Chat,0.5405844155844156,0.5628246753246753,0.551480408610067,0.625,0.5852272727272727,0.4943181818181818,0.5568181818181818,0.5056818181818182,0.5568181818181818,0.4602272727272727
10
  Meta-Llama-3-8B-Instruct,0.4610389610389611,0.45097402597402597,0.4559509553669637,0.5965909090909091,0.48295454545454547,0.5,0.4602272727272727,0.42045454545454547,0.4034090909090909,0.36363636363636365
 
11
  Meta-Llama-3.1-70B-Instruct,0.6566558441558442,0.598051948051948,0.6259852839118454,0.7443181818181818,0.7215909090909091,0.6647727272727273,0.6534090909090909,0.6193181818181818,0.625,0.5681818181818182
12
  Qwen2_5_3B_Instruct,0.4878246753246754,0.3594155844155844,0.41388918606681485,0.6079545454545454,0.6420454545454546,0.45454545454545453,0.4602272727272727,0.48295454545454547,0.42045454545454547,0.3465909090909091
13
  SeaLLMs-v3-7B-Chat,0.5551948051948051,0.5142857142857142,0.5339578453833284,0.6022727272727273,0.6647727272727273,0.5738636363636364,0.5454545454545454,0.5170454545454546,0.5,0.48295454545454547
14
  Qwen2_5_72B_Instruct,0.7248376623376623,0.6852272727272727,0.7044761161663122,0.8011363636363636,0.7954545454545454,0.7272727272727273,0.6704545454545454,0.7159090909090909,0.7159090909090909,0.6477272727272727
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.4829545454545454,0.4952922077922078,0.48904557518459746,0.5397727272727273,0.5340909090909091,0.4772727272727273,0.5056818181818182,0.4602272727272727,0.45454545454545453,0.4090909090909091
16
  gemma-2-9b-it,0.6185064935064934,0.5592532467532466,0.5873893507784849,0.6647727272727273,0.6761363636363636,0.5625,0.6193181818181818,0.5795454545454546,0.6420454545454546,0.5852272727272727
17
  Meta-Llama-3-70B-Instruct,0.6306818181818182,0.6186688311688312,0.6246175698800746,0.7102272727272727,0.6875,0.6420454545454546,0.6193181818181818,0.6022727272727273,0.6136363636363636,0.5397727272727273
18
  Qwen2_5_14B_Instruct,0.6436688311688312,0.5938311688311688,0.6177464473895627,0.75,0.7386363636363636,0.6306818181818182,0.6420454545454546,0.6136363636363636,0.5965909090909091,0.5340909090909091
results/cross_lingual/zero_shot/cross_logiqa_no_prompt.csv CHANGED
@@ -1,10 +1,17 @@
1
  Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
 
2
  Meta-Llama-3.1-8B-Instruct,0.512987012987013,0.4394480519480519,0.4733785048611023,0.5852272727272727,0.5852272727272727,0.5454545454545454,0.5,0.45454545454545453,0.5227272727272727,0.3977272727272727
3
  llama3-8b-cpt-sea-lionv2.1-instruct,0.45779220779220786,0.3751623376623376,0.412378792469608,0.5284090909090909,0.5170454545454546,0.5340909090909091,0.4602272727272727,0.4034090909090909,0.4431818181818182,0.3181818181818182
4
  Qwen2_5_7B_Instruct,0.6047077922077922,0.47938311688311697,0.5348014705675028,0.6931818181818182,0.7102272727272727,0.6420454545454546,0.5795454545454546,0.6306818181818182,0.5340909090909091,0.4431818181818182
 
5
  Sailor2-8B-Chat,0.5503246753246753,0.5363636363636365,0.5432544747850031,0.6136363636363636,0.625,0.5056818181818182,0.5625,0.5113636363636364,0.5511363636363636,0.48295454545454547
6
  Meta-Llama-3-8B-Instruct,0.5,0.4426948051948052,0.46960564830561785,0.6022727272727273,0.5227272727272727,0.5454545454545454,0.5056818181818182,0.4375,0.48295454545454547,0.4034090909090909
7
- MERaLiON-Llama-3-8B-Instruct,0.5105519480519481,0.4558441558441559,0.48164954476113636,0.5909090909090909,0.5284090909090909,0.5454545454545454,0.5,0.4943181818181818,0.48863636363636365,0.42613636363636365
8
  SeaLLMs-v3-7B-Chat,0.5324675324675324,0.41266233766233773,0.46497164802104307,0.5681818181818182,0.5852272727272727,0.5738636363636364,0.5568181818181818,0.4943181818181818,0.5170454545454546,0.4318181818181818
 
9
  gemma-2-9b-it,0.6006493506493508,0.5753246753246755,0.587714328691409,0.6590909090909091,0.6363636363636364,0.5511363636363636,0.6022727272727273,0.5852272727272727,0.6022727272727273,0.5681818181818182
 
10
  gemma2-9b-cpt-sea-lionv3-instruct,0.5844155844155844,0.605844155844156,0.5949369778657175,0.6363636363636364,0.6420454545454546,0.5625,0.5681818181818182,0.5568181818181818,0.5511363636363636,0.5738636363636364
 
 
 
 
1
  Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
2
+ Qwen2-7B-Instruct,0.5470779220779222,0.4477272727272727,0.4924415499148947,0.6079545454545454,0.6704545454545454,0.5397727272727273,0.5284090909090909,0.5397727272727273,0.5340909090909091,0.4090909090909091
3
  Meta-Llama-3.1-8B-Instruct,0.512987012987013,0.4394480519480519,0.4733785048611023,0.5852272727272727,0.5852272727272727,0.5454545454545454,0.5,0.45454545454545453,0.5227272727272727,0.3977272727272727
4
  llama3-8b-cpt-sea-lionv2.1-instruct,0.45779220779220786,0.3751623376623376,0.412378792469608,0.5284090909090909,0.5170454545454546,0.5340909090909091,0.4602272727272727,0.4034090909090909,0.4431818181818182,0.3181818181818182
5
  Qwen2_5_7B_Instruct,0.6047077922077922,0.47938311688311697,0.5348014705675028,0.6931818181818182,0.7102272727272727,0.6420454545454546,0.5795454545454546,0.6306818181818182,0.5340909090909091,0.4431818181818182
6
+ Qwen2_5_1_5B_Instruct,0.44642857142857134,0.27094155844155837,0.33722076741815865,0.48295454545454547,0.5454545454545454,0.44886363636363635,0.4659090909090909,0.4772727272727273,0.36363636363636365,0.3409090909090909
7
  Sailor2-8B-Chat,0.5503246753246753,0.5363636363636365,0.5432544747850031,0.6136363636363636,0.625,0.5056818181818182,0.5625,0.5113636363636364,0.5511363636363636,0.48295454545454547
8
  Meta-Llama-3-8B-Instruct,0.5,0.4426948051948052,0.46960564830561785,0.6022727272727273,0.5227272727272727,0.5454545454545454,0.5056818181818182,0.4375,0.48295454545454547,0.4034090909090909
9
+ Qwen2_5_3B_Instruct,0.5097402597402597,0.3623376623376624,0.42358163053231446,0.6363636363636364,0.6136363636363636,0.4659090909090909,0.4602272727272727,0.5056818181818182,0.48863636363636365,0.3977272727272727
10
  SeaLLMs-v3-7B-Chat,0.5324675324675324,0.41266233766233773,0.46497164802104307,0.5681818181818182,0.5852272727272727,0.5738636363636364,0.5568181818181818,0.4943181818181818,0.5170454545454546,0.4318181818181818
11
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.5105519480519481,0.4558441558441559,0.48164954476113636,0.5909090909090909,0.5284090909090909,0.5454545454545454,0.5,0.4943181818181818,0.48863636363636365,0.42613636363636365
12
  gemma-2-9b-it,0.6006493506493508,0.5753246753246755,0.587714328691409,0.6590909090909091,0.6363636363636364,0.5511363636363636,0.6022727272727273,0.5852272727272727,0.6022727272727273,0.5681818181818182
13
+ Qwen2_5_14B_Instruct,0.6420454545454545,0.5673701298701299,0.6024023794498856,0.7443181818181818,0.7215909090909091,0.625,0.6477272727272727,0.6306818181818182,0.6193181818181818,0.5056818181818182
14
  gemma2-9b-cpt-sea-lionv3-instruct,0.5844155844155844,0.605844155844156,0.5949369778657175,0.6363636363636364,0.6420454545454546,0.5625,0.5681818181818182,0.5568181818181818,0.5511363636363636,0.5738636363636364
15
+ gemma-2-2b-it,0.48295454545454547,0.46590909090909094,0.4742787152466955,0.5170454545454546,0.5284090909090909,0.5170454545454546,0.4602272727272727,0.45454545454545453,0.48863636363636365,0.4147727272727273
16
+ llama3-8b-cpt-sea-lionv2-instruct,0.45373376623376627,0.37159090909090914,0.408574583313631,0.5397727272727273,0.4943181818181818,0.5340909090909091,0.45454545454545453,0.3977272727272727,0.4318181818181818,0.32386363636363635
17
+ Qwen2_5_0_5B_Instruct,0.3319805194805195,0.16087662337662337,0.216727730394231,0.35795454545454547,0.3806818181818182,0.32386363636363635,0.3125,0.3352272727272727,0.3068181818181818,0.3068181818181818
results/cross_lingual/zero_shot/cross_mmlu.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.5076190476190475,0.3721904761904762,0.42948154099799957,
8
  Qwen2-72B-Instruct,0.779047619047619,0.7611428571428573,0.7699911663398871,0.8133333333333334,0.7933333333333333,0.7933333333333333,0.7333333333333333,0.7666666666666667,0.78,0.7733333333333333
9
  Sailor2-8B-Chat,0.6542857142857142,0.6586666666666667,0.6564688814239598,0.7133333333333334,0.6733333333333333,0.6533333333333333,0.6066666666666667,0.62,0.6466666666666666,0.6666666666666666
10
  Meta-Llama-3-8B-Instruct,0.5733333333333334,0.4742857142857144,0.5191272726777197,0.7133333333333334,0.5866666666666667,0.5733333333333334,0.5866666666666667,0.5066666666666667,0.5333333333333333,0.5133333333333333
11
- MERaLiON-Llama-3-8B-Instruct,0.5980952380952381,0.5817142857142859,0.5897910419722433,0.76,0.5866666666666667,0.6266666666666667,0.5466666666666666,0.5666666666666667,0.5533333333333333,0.5466666666666666
12
  Meta-Llama-3.1-70B-Instruct,0.7638095238095238,0.7716190476190474,0.7676944251955988,0.8,0.74,0.7666666666666667,0.7666666666666667,0.76,0.7666666666666667,0.7466666666666667
13
  Qwen2_5_3B_Instruct,0.5857142857142856,0.48952380952380964,0.533316462053399,0.6933333333333334,0.6666666666666666,0.64,0.5266666666666666,0.6333333333333333,0.5466666666666666,0.3933333333333333
14
  SeaLLMs-v3-7B-Chat,0.6628571428571429,0.6135238095238095,0.6372370860992635,0.74,0.6933333333333334,0.6933333333333334,0.6466666666666666,0.68,0.6,0.5866666666666667
15
  Qwen2_5_72B_Instruct,0.8123809523809525,0.8140952380952383,0.8132371917701643,0.8533333333333334,0.8333333333333334,0.84,0.7933333333333333,0.8066666666666666,0.7733333333333333,0.7866666666666666
 
16
  gemma-2-9b-it,0.7161904761904762,0.7163809523809525,0.7162857015727578,0.7733333333333333,0.74,0.7066666666666667,0.64,0.7266666666666667,0.6933333333333334,0.7333333333333333
17
  Meta-Llama-3-70B-Instruct,0.758095238095238,0.7316190476190477,0.7446218665971989,0.7933333333333333,0.7466666666666667,0.7733333333333333,0.7466666666666667,0.7733333333333333,0.7333333333333333,0.74
18
  Qwen2_5_14B_Instruct,0.7266666666666666,0.680952380952381,0.7030672078887086,0.78,0.7533333333333333,0.7533333333333333,0.7266666666666667,0.7466666666666667,0.68,0.6466666666666666
 
8
  Qwen2-72B-Instruct,0.779047619047619,0.7611428571428573,0.7699911663398871,0.8133333333333334,0.7933333333333333,0.7933333333333333,0.7333333333333333,0.7666666666666667,0.78,0.7733333333333333
9
  Sailor2-8B-Chat,0.6542857142857142,0.6586666666666667,0.6564688814239598,0.7133333333333334,0.6733333333333333,0.6533333333333333,0.6066666666666667,0.62,0.6466666666666666,0.6666666666666666
10
  Meta-Llama-3-8B-Instruct,0.5733333333333334,0.4742857142857144,0.5191272726777197,0.7133333333333334,0.5866666666666667,0.5733333333333334,0.5866666666666667,0.5066666666666667,0.5333333333333333,0.5133333333333333
 
11
  Meta-Llama-3.1-70B-Instruct,0.7638095238095238,0.7716190476190474,0.7676944251955988,0.8,0.74,0.7666666666666667,0.7666666666666667,0.76,0.7666666666666667,0.7466666666666667
12
  Qwen2_5_3B_Instruct,0.5857142857142856,0.48952380952380964,0.533316462053399,0.6933333333333334,0.6666666666666666,0.64,0.5266666666666666,0.6333333333333333,0.5466666666666666,0.3933333333333333
13
  SeaLLMs-v3-7B-Chat,0.6628571428571429,0.6135238095238095,0.6372370860992635,0.74,0.6933333333333334,0.6933333333333334,0.6466666666666666,0.68,0.6,0.5866666666666667
14
  Qwen2_5_72B_Instruct,0.8123809523809525,0.8140952380952383,0.8132371917701643,0.8533333333333334,0.8333333333333334,0.84,0.7933333333333333,0.8066666666666666,0.7733333333333333,0.7866666666666666
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.5980952380952381,0.5817142857142859,0.5897910419722433,0.76,0.5866666666666667,0.6266666666666667,0.5466666666666666,0.5666666666666667,0.5533333333333333,0.5466666666666666
16
  gemma-2-9b-it,0.7161904761904762,0.7163809523809525,0.7162857015727578,0.7733333333333333,0.74,0.7066666666666667,0.64,0.7266666666666667,0.6933333333333334,0.7333333333333333
17
  Meta-Llama-3-70B-Instruct,0.758095238095238,0.7316190476190477,0.7446218665971989,0.7933333333333333,0.7466666666666667,0.7733333333333333,0.7466666666666667,0.7733333333333333,0.7333333333333333,0.74
18
  Qwen2_5_14B_Instruct,0.7266666666666666,0.680952380952381,0.7030672078887086,0.78,0.7533333333333333,0.7533333333333333,0.7266666666666667,0.7466666666666667,0.68,0.6466666666666666
results/cross_lingual/zero_shot/cross_mmlu_no_prompt.csv CHANGED
@@ -1,11 +1,18 @@
1
  Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
 
2
  Meta-Llama-3.1-8B-Instruct,0.6876190476190477,0.5615238095238096,0.6182070607559236,0.82,0.6333333333333333,0.72,0.6666666666666666,0.66,0.6466666666666666,0.6666666666666666
3
  llama3-8b-cpt-sea-lionv2.1-instruct,0.6676190476190476,0.5590476190476189,0.6085285418019147,0.7533333333333333,0.6666666666666666,0.68,0.6333333333333333,0.6933333333333334,0.64,0.6066666666666667
4
  Qwen2_5_7B_Instruct,0.7742857142857141,0.6222857142857142,0.6900140284752591,0.8466666666666667,0.84,0.8266666666666667,0.74,0.7533333333333333,0.7133333333333334,0.7
 
5
  Sailor2-8B-Chat,0.6923809523809524,0.6592380952380954,0.6754031781322388,0.7266666666666667,0.7066666666666667,0.7133333333333334,0.6733333333333333,0.6733333333333333,0.6466666666666666,0.7066666666666667
6
  Meta-Llama-3-8B-Instruct,0.6647619047619048,0.5036190476190476,0.5730780815259353,0.7733333333333333,0.66,0.6666666666666666,0.66,0.6266666666666667,0.64,0.6266666666666667
7
- MERaLiON-Llama-3-8B-Instruct,0.6980952380952381,0.5891428571428572,0.6390081595918414,0.8466666666666667,0.6933333333333334,0.6933333333333334,0.6933333333333334,0.7133333333333334,0.6133333333333333,0.6333333333333333
8
  SeaLLMs-v3-7B-Chat,0.7342857142857142,0.5765714285714287,0.6459409639562039,0.8333333333333334,0.7266666666666667,0.7866666666666666,0.7133333333333334,0.74,0.6866666666666666,0.6533333333333333
 
9
  gemma-2-9b-it,0.781904761904762,0.747047619047619,0.7640788528690432,0.84,0.7933333333333333,0.7866666666666666,0.7466666666666667,0.78,0.7466666666666667,0.78
 
10
  gemma2-9b-cpt-sea-lionv3-instruct,0.7809523809523808,0.7506666666666667,0.7655100940510849,0.8466666666666667,0.7866666666666666,0.7733333333333333,0.78,0.7933333333333333,0.7333333333333333,0.7533333333333333
 
 
 
11
  GPT4o_0513,0.8819047619047619,0.8609523809523807,0.8713026281050943,0.9266666666666666,0.8866666666666667,0.9066666666666666,0.7933333333333333,0.88,0.9066666666666666,0.8733333333333333
 
1
  Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
2
+ Qwen2-7B-Instruct,0.7133333333333333,0.5862857142857144,0.6435995895738107,0.8333333333333334,0.7533333333333333,0.7666666666666667,0.64,0.7066666666666667,0.6933333333333334,0.6
3
  Meta-Llama-3.1-8B-Instruct,0.6876190476190477,0.5615238095238096,0.6182070607559236,0.82,0.6333333333333333,0.72,0.6666666666666666,0.66,0.6466666666666666,0.6666666666666666
4
  llama3-8b-cpt-sea-lionv2.1-instruct,0.6676190476190476,0.5590476190476189,0.6085285418019147,0.7533333333333333,0.6666666666666666,0.68,0.6333333333333333,0.6933333333333334,0.64,0.6066666666666667
5
  Qwen2_5_7B_Instruct,0.7742857142857141,0.6222857142857142,0.6900140284752591,0.8466666666666667,0.84,0.8266666666666667,0.74,0.7533333333333333,0.7133333333333334,0.7
6
+ Qwen2_5_1_5B_Instruct,0.5285714285714286,0.32666666666666666,0.403786191489535,0.7,0.6333333333333333,0.5333333333333333,0.4666666666666667,0.5,0.44,0.4266666666666667
7
  Sailor2-8B-Chat,0.6923809523809524,0.6592380952380954,0.6754031781322388,0.7266666666666667,0.7066666666666667,0.7133333333333334,0.6733333333333333,0.6733333333333333,0.6466666666666666,0.7066666666666667
8
  Meta-Llama-3-8B-Instruct,0.6647619047619048,0.5036190476190476,0.5730780815259353,0.7733333333333333,0.66,0.6666666666666666,0.66,0.6266666666666667,0.64,0.6266666666666667
9
+ Qwen2_5_3B_Instruct,0.6676190476190477,0.45619047619047626,0.5420161420018103,0.8333333333333334,0.74,0.7,0.62,0.68,0.62,0.48
10
  SeaLLMs-v3-7B-Chat,0.7342857142857142,0.5765714285714287,0.6459409639562039,0.8333333333333334,0.7266666666666667,0.7866666666666666,0.7133333333333334,0.74,0.6866666666666666,0.6533333333333333
11
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.6980952380952381,0.5891428571428572,0.6390081595918414,0.8466666666666667,0.6933333333333334,0.6933333333333334,0.6933333333333334,0.7133333333333334,0.6133333333333333,0.6333333333333333
12
  gemma-2-9b-it,0.781904761904762,0.747047619047619,0.7640788528690432,0.84,0.7933333333333333,0.7866666666666666,0.7466666666666667,0.78,0.7466666666666667,0.78
13
+ Qwen2_5_14B_Instruct,0.8142857142857143,0.7396190476190475,0.7751584771679209,0.8733333333333333,0.8533333333333334,0.8133333333333334,0.8333333333333334,0.84,0.7666666666666667,0.72
14
  gemma2-9b-cpt-sea-lionv3-instruct,0.7809523809523808,0.7506666666666667,0.7655100940510849,0.8466666666666667,0.7866666666666666,0.7733333333333333,0.78,0.7933333333333333,0.7333333333333333,0.7533333333333333
15
+ gemma-2-2b-it,0.6514285714285714,0.5255238095238095,0.5817418444469077,0.76,0.6533333333333333,0.7,0.6066666666666667,0.64,0.5866666666666667,0.6133333333333333
16
+ llama3-8b-cpt-sea-lionv2-instruct,0.6685714285714285,0.5620952380952383,0.6107272204160255,0.76,0.66,0.6733333333333333,0.6533333333333333,0.6866666666666666,0.64,0.6066666666666667
17
+ Qwen2_5_0_5B_Instruct,0.41904761904761906,0.17276190476190473,0.24465799189698598,0.5666666666666667,0.4666666666666667,0.4066666666666667,0.4066666666666667,0.37333333333333335,0.34,0.37333333333333335
18
  GPT4o_0513,0.8819047619047619,0.8609523809523807,0.8713026281050943,0.9266666666666666,0.8866666666666667,0.9066666666666666,0.7933333333333333,0.88,0.9066666666666666,0.8733333333333333
results/cross_lingual/zero_shot/cross_xquad.csv CHANGED
@@ -6,11 +6,11 @@ Qwen2_5_7B_Instruct,0.9460084033613445,0.9178571428571428,0.9317201790045005,0.9
6
  Qwen2_5_1_5B_Instruct,0.8939075630252101,0.8308823529411764,0.8612434620121144,0.9100840336134454,0.9,0.8957983193277311,0.8697478991596639,,,
7
  Qwen2-72B-Instruct,0.9613445378151261,0.9516806722689075,0.956488195931227,0.9638655462184874,0.9596638655462185,0.9596638655462185,0.9621848739495799,,,
8
  Meta-Llama-3-8B-Instruct,0.9210084033613445,0.880672268907563,0.9003888121913395,0.9411764705882353,0.9033613445378151,0.9260504201680673,0.9134453781512605,,,
9
- MERaLiON-Llama-3-8B-Instruct,0.9369747899159664,0.8936974789915966,0.9148245940061492,0.9470588235294117,0.9218487394957983,0.9403361344537815,0.938655462184874,,,
10
  Meta-Llama-3.1-70B-Instruct,0.9615546218487395,0.9512605042016806,0.9563798632627071,0.9647058823529412,0.9512605042016806,0.9647058823529412,0.965546218487395,,,
11
  Qwen2_5_3B_Instruct,0.9378151260504202,0.8924369747899159,0.9145635113049859,0.9504201680672268,0.9327731092436975,0.9378151260504202,0.9302521008403362,,,
12
  SeaLLMs-v3-7B-Chat,0.9403361344537815,0.917016806722689,0.9285300818164836,0.9537815126050421,0.9378151260504202,0.9394957983193277,0.9302521008403362,,,
13
  Qwen2_5_72B_Instruct,0.9682773109243697,0.9632352941176471,0.9657497216354985,0.9714285714285714,0.9638655462184874,0.9680672268907563,0.9697478991596639,,,
 
14
  gemma-2-9b-it,0.9567226890756303,0.9350840336134454,0.9457796088507574,0.9663865546218487,0.9411764705882353,0.9588235294117647,0.9605042016806723,,,
15
  Meta-Llama-3-70B-Instruct,0.9592436974789916,0.9422268907563025,0.9506591499208973,0.9714285714285714,0.9403361344537815,0.9596638655462185,0.965546218487395,,,
16
  Qwen2_5_14B_Instruct,0.9581932773109244,0.9474789915966386,0.9528060148705768,0.965546218487395,0.9529411764705882,0.9571428571428572,0.9571428571428572,,,
 
6
  Qwen2_5_1_5B_Instruct,0.8939075630252101,0.8308823529411764,0.8612434620121144,0.9100840336134454,0.9,0.8957983193277311,0.8697478991596639,,,
7
  Qwen2-72B-Instruct,0.9613445378151261,0.9516806722689075,0.956488195931227,0.9638655462184874,0.9596638655462185,0.9596638655462185,0.9621848739495799,,,
8
  Meta-Llama-3-8B-Instruct,0.9210084033613445,0.880672268907563,0.9003888121913395,0.9411764705882353,0.9033613445378151,0.9260504201680673,0.9134453781512605,,,
 
9
  Meta-Llama-3.1-70B-Instruct,0.9615546218487395,0.9512605042016806,0.9563798632627071,0.9647058823529412,0.9512605042016806,0.9647058823529412,0.965546218487395,,,
10
  Qwen2_5_3B_Instruct,0.9378151260504202,0.8924369747899159,0.9145635113049859,0.9504201680672268,0.9327731092436975,0.9378151260504202,0.9302521008403362,,,
11
  SeaLLMs-v3-7B-Chat,0.9403361344537815,0.917016806722689,0.9285300818164836,0.9537815126050421,0.9378151260504202,0.9394957983193277,0.9302521008403362,,,
12
  Qwen2_5_72B_Instruct,0.9682773109243697,0.9632352941176471,0.9657497216354985,0.9714285714285714,0.9638655462184874,0.9680672268907563,0.9697478991596639,,,
13
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.9369747899159664,0.8936974789915966,0.9148245940061492,0.9470588235294117,0.9218487394957983,0.9403361344537815,0.938655462184874,,,
14
  gemma-2-9b-it,0.9567226890756303,0.9350840336134454,0.9457796088507574,0.9663865546218487,0.9411764705882353,0.9588235294117647,0.9605042016806723,,,
15
  Meta-Llama-3-70B-Instruct,0.9592436974789916,0.9422268907563025,0.9506591499208973,0.9714285714285714,0.9403361344537815,0.9596638655462185,0.965546218487395,,,
16
  Qwen2_5_14B_Instruct,0.9581932773109244,0.9474789915966386,0.9528060148705768,0.965546218487395,0.9529411764705882,0.9571428571428572,0.9571428571428572,,,
results/cross_lingual/zero_shot/cross_xquad_no_prompt.csv CHANGED
@@ -1,11 +1,18 @@
1
  Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
 
2
  Meta-Llama-3.1-8B-Instruct,0.9168067226890756,0.8292016806722688,0.870806433460842,0.9436974789915966,0.8949579831932774,0.9201680672268907,0.9084033613445378,,,
3
  llama3-8b-cpt-sea-lionv2.1-instruct,0.928781512605042,0.8592436974789917,0.892660412722869,0.9470588235294117,0.9084033613445378,0.9352941176470588,0.9243697478991597,,,
4
  Qwen2_5_7B_Instruct,0.9069327731092437,0.8264705882352941,0.8648342089942876,0.9210084033613445,0.8991596638655462,0.9092436974789916,0.8983193277310925,,,
 
5
  Sailor2-8B-Chat,0.9086134453781513,0.8378151260504201,0.8717792421413649,0.9252100840336135,0.8949579831932774,0.9117647058823529,0.9025210084033614,,,
6
  Meta-Llama-3-8B-Instruct,0.9117647058823529,0.8266806722689075,0.8671405721911006,0.9310924369747899,0.8848739495798319,0.9277310924369748,0.9033613445378151,,,
7
- MERaLiON-Llama-3-8B-Instruct,0.9117647058823529,0.8266806722689075,0.8671405721911006,0.9302521008403362,0.8899159663865546,0.9210084033613445,0.9058823529411765,,,
8
  SeaLLMs-v3-7B-Chat,0.8943277310924369,0.7991596638655463,0.8440696412045011,0.9210084033613445,0.8773109243697479,0.9,0.8789915966386554,,,
 
9
  gemma-2-9b-it,0.8668067226890757,0.7012605042016806,0.7752949732453414,0.8773109243697479,0.8529411764705882,0.8714285714285714,0.865546218487395,,,
 
10
  gemma2-9b-cpt-sea-lionv3-instruct,0.9315126050420168,0.8716386554621849,0.9005815677746684,0.9453781512605042,0.9142857142857143,0.9369747899159664,0.9294117647058824,,,
 
 
 
11
  GPT4o_0513,0.8941176470588236,0.8014705882352942,0.8452629967360276,0.9302521008403362,0.8857142857142857,0.9168067226890756,0.8436974789915966,,,
 
1
  Model,Accuracy,Cross-Lingual Consistency,AC3,English,Chinese,Spanish,Vietnamese,Indonesian,Malay,Filipino
2
+ Qwen2-7B-Instruct,0.8949579831932772,0.8128151260504202,0.8519110436147214,0.915126050420168,0.8781512605042017,0.9126050420168067,0.8739495798319328,,,
3
  Meta-Llama-3.1-8B-Instruct,0.9168067226890756,0.8292016806722688,0.870806433460842,0.9436974789915966,0.8949579831932774,0.9201680672268907,0.9084033613445378,,,
4
  llama3-8b-cpt-sea-lionv2.1-instruct,0.928781512605042,0.8592436974789917,0.892660412722869,0.9470588235294117,0.9084033613445378,0.9352941176470588,0.9243697478991597,,,
5
  Qwen2_5_7B_Instruct,0.9069327731092437,0.8264705882352941,0.8648342089942876,0.9210084033613445,0.8991596638655462,0.9092436974789916,0.8983193277310925,,,
6
+ Qwen2_5_1_5B_Instruct,0.8439075630252101,0.6844537815126049,0.7558627739261137,0.8890756302521008,0.8369747899159664,0.8588235294117647,0.7907563025210084,,,
7
  Sailor2-8B-Chat,0.9086134453781513,0.8378151260504201,0.8717792421413649,0.9252100840336135,0.8949579831932774,0.9117647058823529,0.9025210084033614,,,
8
  Meta-Llama-3-8B-Instruct,0.9117647058823529,0.8266806722689075,0.8671405721911006,0.9310924369747899,0.8848739495798319,0.9277310924369748,0.9033613445378151,,,
9
+ Qwen2_5_3B_Instruct,0.8859243697478991,0.7831932773109245,0.8313973694706849,0.9058823529411765,0.8739495798319328,0.9008403361344538,0.8630252100840337,,,
10
  SeaLLMs-v3-7B-Chat,0.8943277310924369,0.7991596638655463,0.8440696412045011,0.9210084033613445,0.8773109243697479,0.9,0.8789915966386554,,,
11
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.9117647058823529,0.8266806722689075,0.8671405721911006,0.9302521008403362,0.8899159663865546,0.9210084033613445,0.9058823529411765,,,
12
  gemma-2-9b-it,0.8668067226890757,0.7012605042016806,0.7752949732453414,0.8773109243697479,0.8529411764705882,0.8714285714285714,0.865546218487395,,,
13
+ Qwen2_5_14B_Instruct,0.9084033613445378,0.8453781512605042,0.8757582956018183,0.9159663865546218,0.8983193277310925,0.915126050420168,0.9042016806722689,,,
14
  gemma2-9b-cpt-sea-lionv3-instruct,0.9315126050420168,0.8716386554621849,0.9005815677746684,0.9453781512605042,0.9142857142857143,0.9369747899159664,0.9294117647058824,,,
15
+ gemma-2-2b-it,0.8873949579831932,0.7871848739495798,0.8342915336505994,0.9184873949579831,0.8638655462184874,0.9016806722689076,0.865546218487395,,,
16
+ llama3-8b-cpt-sea-lionv2-instruct,0.9296218487394958,0.8630252100840337,0.8950865005059928,0.9445378151260504,0.9058823529411765,0.9411764705882353,0.926890756302521,,,
17
+ Qwen2_5_0_5B_Instruct,0.7186974789915966,0.4804621848739496,0.5759149034045948,0.7815126050420168,0.7142857142857143,0.7478991596638656,0.6310924369747899,,,
18
  GPT4o_0513,0.8941176470588236,0.8014705882352942,0.8452629967360276,0.9302521008403362,0.8857142857142857,0.9168067226890756,0.8436974789915966,,,
results/cultural_reasoning/zero_shot/cn_eval.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.5523809523809524
8
  Qwen2-72B-Instruct,0.8285714285714286
9
  Sailor2-8B-Chat,0.7142857142857143
10
  Meta-Llama-3-8B-Instruct,0.4666666666666667
11
- MERaLiON-Llama-3-8B-Instruct,0.5142857142857142
12
  Meta-Llama-3.1-70B-Instruct,0.5428571428571428
13
  Qwen2_5_3B_Instruct,0.7142857142857143
14
  SeaLLMs-v3-7B-Chat,0.819047619047619
15
  Qwen2_5_72B_Instruct,0.8761904761904762
 
16
  gemma-2-9b-it,0.580952380952381
17
  Meta-Llama-3-70B-Instruct,0.5333333333333333
18
  Qwen2_5_14B_Instruct,0.8285714285714286
 
8
  Qwen2-72B-Instruct,0.8285714285714286
9
  Sailor2-8B-Chat,0.7142857142857143
10
  Meta-Llama-3-8B-Instruct,0.4666666666666667
 
11
  Meta-Llama-3.1-70B-Instruct,0.5428571428571428
12
  Qwen2_5_3B_Instruct,0.7142857142857143
13
  SeaLLMs-v3-7B-Chat,0.819047619047619
14
  Qwen2_5_72B_Instruct,0.8761904761904762
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.5142857142857142
16
  gemma-2-9b-it,0.580952380952381
17
  Meta-Llama-3-70B-Instruct,0.5333333333333333
18
  Qwen2_5_14B_Instruct,0.8285714285714286
results/cultural_reasoning/zero_shot/ph_eval.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.37
8
  Qwen2-72B-Instruct,0.62
9
  Sailor2-8B-Chat,0.53
10
  Meta-Llama-3-8B-Instruct,0.58
11
- MERaLiON-Llama-3-8B-Instruct,0.54
12
  Meta-Llama-3.1-70B-Instruct,0.68
13
  Qwen2_5_3B_Instruct,0.4
14
  SeaLLMs-v3-7B-Chat,0.47
15
  Qwen2_5_72B_Instruct,0.72
 
16
  gemma-2-9b-it,0.58
17
  Meta-Llama-3-70B-Instruct,0.63
18
  Qwen2_5_14B_Instruct,0.6
 
8
  Qwen2-72B-Instruct,0.62
9
  Sailor2-8B-Chat,0.53
10
  Meta-Llama-3-8B-Instruct,0.58
 
11
  Meta-Llama-3.1-70B-Instruct,0.68
12
  Qwen2_5_3B_Instruct,0.4
13
  SeaLLMs-v3-7B-Chat,0.47
14
  Qwen2_5_72B_Instruct,0.72
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.54
16
  gemma-2-9b-it,0.58
17
  Meta-Llama-3-70B-Instruct,0.63
18
  Qwen2_5_14B_Instruct,0.6
results/cultural_reasoning/zero_shot/sg_eval_v2_mcq_no_prompt.csv CHANGED
@@ -1,11 +1,18 @@
1
  Model,Accuracy
 
2
  Meta-Llama-3.1-8B-Instruct,0.7418181818181818
3
  llama3-8b-cpt-sea-lionv2.1-instruct,0.7945454545454546
4
  Qwen2_5_7B_Instruct,0.7654545454545455
 
5
  Sailor2-8B-Chat,0.7145454545454546
6
  Meta-Llama-3-8B-Instruct,0.8290909090909091
7
- MERaLiON-Llama-3-8B-Instruct,0.7854545454545454
8
  SeaLLMs-v3-7B-Chat,0.7581818181818182
 
9
  gemma-2-9b-it,0.7618181818181818
 
10
  gemma2-9b-cpt-sea-lionv3-instruct,0.7818181818181819
 
 
 
11
  GPT4o_0513,0.9072727272727272
 
1
  Model,Accuracy
2
+ Qwen2-7B-Instruct,0.7618181818181818
3
  Meta-Llama-3.1-8B-Instruct,0.7418181818181818
4
  llama3-8b-cpt-sea-lionv2.1-instruct,0.7945454545454546
5
  Qwen2_5_7B_Instruct,0.7654545454545455
6
+ Qwen2_5_1_5B_Instruct,0.6927272727272727
7
  Sailor2-8B-Chat,0.7145454545454546
8
  Meta-Llama-3-8B-Instruct,0.8290909090909091
9
+ Qwen2_5_3B_Instruct,0.7072727272727273
10
  SeaLLMs-v3-7B-Chat,0.7581818181818182
11
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.7854545454545454
12
  gemma-2-9b-it,0.7618181818181818
13
+ Qwen2_5_14B_Instruct,0.8236363636363636
14
  gemma2-9b-cpt-sea-lionv3-instruct,0.7818181818181819
15
+ gemma-2-2b-it,0.6927272727272727
16
+ llama3-8b-cpt-sea-lionv2-instruct,0.7981818181818182
17
+ Qwen2_5_0_5B_Instruct,0.5490909090909091
18
  GPT4o_0513,0.9072727272727272
results/cultural_reasoning/zero_shot/sg_eval_v2_open.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,44.480000000000004
8
  Qwen2-72B-Instruct,54.080000000000005
9
  Sailor2-8B-Chat,54.36
10
  Meta-Llama-3-8B-Instruct,51.120000000000005
11
- MERaLiON-Llama-3-8B-Instruct,49.2
12
  Meta-Llama-3.1-70B-Instruct,51.31999999999999
13
  Qwen2_5_3B_Instruct,47.24
14
  SeaLLMs-v3-7B-Chat,55.0
15
  Qwen2_5_72B_Instruct,53.32
 
16
  gemma-2-9b-it,53.96
17
  Meta-Llama-3-70B-Instruct,50.599999999999994
18
  Qwen2_5_14B_Instruct,53.2
 
8
  Qwen2-72B-Instruct,54.080000000000005
9
  Sailor2-8B-Chat,54.36
10
  Meta-Llama-3-8B-Instruct,51.120000000000005
 
11
  Meta-Llama-3.1-70B-Instruct,51.31999999999999
12
  Qwen2_5_3B_Instruct,47.24
13
  SeaLLMs-v3-7B-Chat,55.0
14
  Qwen2_5_72B_Instruct,53.32
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,49.2
16
  gemma-2-9b-it,53.96
17
  Meta-Llama-3-70B-Instruct,50.599999999999994
18
  Qwen2_5_14B_Instruct,53.2
results/cultural_reasoning/zero_shot/us_eval.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.5981308411214953
8
  Qwen2-72B-Instruct,0.8785046728971962
9
  Sailor2-8B-Chat,0.7009345794392523
10
  Meta-Llama-3-8B-Instruct,0.7009345794392523
11
- MERaLiON-Llama-3-8B-Instruct,0.7383177570093458
12
  Meta-Llama-3.1-70B-Instruct,0.8411214953271028
13
  Qwen2_5_3B_Instruct,0.6728971962616822
14
  SeaLLMs-v3-7B-Chat,0.6915887850467289
15
  Qwen2_5_72B_Instruct,0.8598130841121495
 
16
  gemma-2-9b-it,0.8130841121495327
17
  Meta-Llama-3-70B-Instruct,0.8691588785046729
18
  Qwen2_5_14B_Instruct,0.822429906542056
 
8
  Qwen2-72B-Instruct,0.8785046728971962
9
  Sailor2-8B-Chat,0.7009345794392523
10
  Meta-Llama-3-8B-Instruct,0.7009345794392523
 
11
  Meta-Llama-3.1-70B-Instruct,0.8411214953271028
12
  Qwen2_5_3B_Instruct,0.6728971962616822
13
  SeaLLMs-v3-7B-Chat,0.6915887850467289
14
  Qwen2_5_72B_Instruct,0.8598130841121495
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.7383177570093458
16
  gemma-2-9b-it,0.8130841121495327
17
  Meta-Llama-3-70B-Instruct,0.8691588785046729
18
  Qwen2_5_14B_Instruct,0.822429906542056
results/dialogue/zero_shot/dialogsum.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.20263242988485167,0.30002072253966694,0.0841667023855871
8
  Qwen2-72B-Instruct,0.2183280630214023,0.316174552903144,0.10156543495268992,0.23724420120837297
9
  Sailor2-8B-Chat,0.19777087324327317,0.2970393044008424,0.07701994204737679,0.21925337328160027
10
  Meta-Llama-3-8B-Instruct,0.23978455271183616,0.33971099717559883,0.1203340311564728,0.2593086298034369
11
- MERaLiON-Llama-3-8B-Instruct,0.25236243090492,0.3573462392196718,0.125506438977953,0.27423461451713527
12
  Meta-Llama-3.1-70B-Instruct,0.2526239717396146,0.35714386898604744,0.1258832921736473,0.27484475405914904
13
  Qwen2_5_3B_Instruct,0.22107390172674926,0.32206286484028823,0.10065030710901035,0.24050853323094928
14
  SeaLLMs-v3-7B-Chat,0.24891094210680076,0.35393482223136147,0.12172072639345373,0.27107727769558715
15
  Qwen2_5_72B_Instruct,0.23460549655507293,0.3373580017785426,0.10893746645433498,0.25752102143234123
 
16
  gemma-2-9b-it,0.2560682231168516,0.36247455000865003,0.12571639767749476,0.2800137216644101
17
  Meta-Llama-3-70B-Instruct,0.2557065499979308,0.36058417323628,0.12758087337786866,0.2789546033796438
18
  Qwen2_5_14B_Instruct,0.2343478938479703,0.3386251381162625,0.10742381514017992,0.2569947282874686
 
8
  Qwen2-72B-Instruct,0.2183280630214023,0.316174552903144,0.10156543495268992,0.23724420120837297
9
  Sailor2-8B-Chat,0.19777087324327317,0.2970393044008424,0.07701994204737679,0.21925337328160027
10
  Meta-Llama-3-8B-Instruct,0.23978455271183616,0.33971099717559883,0.1203340311564728,0.2593086298034369
 
11
  Meta-Llama-3.1-70B-Instruct,0.2526239717396146,0.35714386898604744,0.1258832921736473,0.27484475405914904
12
  Qwen2_5_3B_Instruct,0.22107390172674926,0.32206286484028823,0.10065030710901035,0.24050853323094928
13
  SeaLLMs-v3-7B-Chat,0.24891094210680076,0.35393482223136147,0.12172072639345373,0.27107727769558715
14
  Qwen2_5_72B_Instruct,0.23460549655507293,0.3373580017785426,0.10893746645433498,0.25752102143234123
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.25236243090492,0.3573462392196718,0.125506438977953,0.27423461451713527
16
  gemma-2-9b-it,0.2560682231168516,0.36247455000865003,0.12571639767749476,0.2800137216644101
17
  Meta-Llama-3-70B-Instruct,0.2557065499979308,0.36058417323628,0.12758087337786866,0.2789546033796438
18
  Qwen2_5_14B_Instruct,0.2343478938479703,0.3386251381162625,0.10742381514017992,0.2569947282874686
results/dialogue/zero_shot/dream.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.8314551690347869
8
  Qwen2-72B-Instruct,0.9612934835864773
9
  Sailor2-8B-Chat,0.9054385105340519
10
  Meta-Llama-3-8B-Instruct,0.8946594806467418
11
- MERaLiON-Llama-3-8B-Instruct,0.9103380695737384
12
  Meta-Llama-3.1-70B-Instruct,0.9559039686428221
13
  Qwen2_5_3B_Instruct,0.9029887310142087
14
  SeaLLMs-v3-7B-Chat,0.9265066144047036
15
  Qwen2_5_72B_Instruct,0.9627633512983832
 
16
  gemma-2-9b-it,0.9416952474277315
17
  Meta-Llama-3-70B-Instruct,0.9480646741793238
18
  Qwen2_5_14B_Instruct,0.9461048505634493
 
8
  Qwen2-72B-Instruct,0.9612934835864773
9
  Sailor2-8B-Chat,0.9054385105340519
10
  Meta-Llama-3-8B-Instruct,0.8946594806467418
 
11
  Meta-Llama-3.1-70B-Instruct,0.9559039686428221
12
  Qwen2_5_3B_Instruct,0.9029887310142087
13
  SeaLLMs-v3-7B-Chat,0.9265066144047036
14
  Qwen2_5_72B_Instruct,0.9627633512983832
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.9103380695737384
16
  gemma-2-9b-it,0.9416952474277315
17
  Meta-Llama-3-70B-Instruct,0.9480646741793238
18
  Qwen2_5_14B_Instruct,0.9461048505634493
results/dialogue/zero_shot/samsum.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.2333120091694482,0.34339111721032756,0.10195887716459845
8
  Qwen2-72B-Instruct,0.2800906719573321,0.3887231369098802,0.15237661526996754,0.29917226369214855
9
  Sailor2-8B-Chat,0.23525560304744508,0.34567892481583223,0.10170204161284628,0.2583858427136568
10
  Meta-Llama-3-8B-Instruct,0.2846315092346869,0.39397110152251813,0.154320846916639,0.30560257926490364
11
- MERaLiON-Llama-3-8B-Instruct,0.2827552959388026,0.3953429193664384,0.14797005050571224,0.30495291794425716
12
  Meta-Llama-3.1-70B-Instruct,0.28934874612070227,0.4036295731242805,0.15211190810296196,0.31230475713486433
13
  Qwen2_5_3B_Instruct,0.26935624341081515,0.380865832002109,0.13872106416227833,0.28848183406805816
14
  SeaLLMs-v3-7B-Chat,0.2959981719045788,0.4078820748825196,0.16338306782652476,0.316729373004692
15
  Qwen2_5_72B_Instruct,0.28852247889830335,0.3996215000271418,0.15494490129237035,0.31100103537539775
 
16
  gemma-2-9b-it,0.3100514077180449,0.4289412957792292,0.16727050182456474,0.3339424255503407
17
  Meta-Llama-3-70B-Instruct,0.2893525314227379,0.4030746211134018,0.15236139065578,0.3126215824990321
18
  Qwen2_5_14B_Instruct,0.2713801253928723,0.3836253496005304,0.13683087953788298,0.2936841470402035
 
8
  Qwen2-72B-Instruct,0.2800906719573321,0.3887231369098802,0.15237661526996754,0.29917226369214855
9
  Sailor2-8B-Chat,0.23525560304744508,0.34567892481583223,0.10170204161284628,0.2583858427136568
10
  Meta-Llama-3-8B-Instruct,0.2846315092346869,0.39397110152251813,0.154320846916639,0.30560257926490364
 
11
  Meta-Llama-3.1-70B-Instruct,0.28934874612070227,0.4036295731242805,0.15211190810296196,0.31230475713486433
12
  Qwen2_5_3B_Instruct,0.26935624341081515,0.380865832002109,0.13872106416227833,0.28848183406805816
13
  SeaLLMs-v3-7B-Chat,0.2959981719045788,0.4078820748825196,0.16338306782652476,0.316729373004692
14
  Qwen2_5_72B_Instruct,0.28852247889830335,0.3996215000271418,0.15494490129237035,0.31100103537539775
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.2827552959388026,0.3953429193664384,0.14797005050571224,0.30495291794425716
16
  gemma-2-9b-it,0.3100514077180449,0.4289412957792292,0.16727050182456474,0.3339424255503407
17
  Meta-Llama-3-70B-Instruct,0.2893525314227379,0.4030746211134018,0.15236139065578,0.3126215824990321
18
  Qwen2_5_14B_Instruct,0.2713801253928723,0.3836253496005304,0.13683087953788298,0.2936841470402035
results/emotion/zero_shot/ind_emotion.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.5795454545454546
8
  Qwen2-72B-Instruct,0.675
9
  Sailor2-8B-Chat,0.7363636363636363
10
  Meta-Llama-3-8B-Instruct,0.6522727272727272
11
- MERaLiON-Llama-3-8B-Instruct,0.7
12
  Meta-Llama-3.1-70B-Instruct,0.7159090909090909
13
  Qwen2_5_3B_Instruct,0.5522727272727272
14
  SeaLLMs-v3-7B-Chat,0.6454545454545455
15
  Qwen2_5_72B_Instruct,0.7068181818181818
 
16
  gemma-2-9b-it,0.7477272727272727
17
  Meta-Llama-3-70B-Instruct,0.6909090909090909
18
  Qwen2_5_14B_Instruct,0.6954545454545454
 
8
  Qwen2-72B-Instruct,0.675
9
  Sailor2-8B-Chat,0.7363636363636363
10
  Meta-Llama-3-8B-Instruct,0.6522727272727272
 
11
  Meta-Llama-3.1-70B-Instruct,0.7159090909090909
12
  Qwen2_5_3B_Instruct,0.5522727272727272
13
  SeaLLMs-v3-7B-Chat,0.6454545454545455
14
  Qwen2_5_72B_Instruct,0.7068181818181818
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.7
16
  gemma-2-9b-it,0.7477272727272727
17
  Meta-Llama-3-70B-Instruct,0.6909090909090909
18
  Qwen2_5_14B_Instruct,0.6954545454545454
results/emotion/zero_shot/sst2.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.9231651376146789
8
  Qwen2-72B-Instruct,0.9346330275229358
9
  Sailor2-8B-Chat,0.9461009174311926
10
  Meta-Llama-3-8B-Instruct,0.8784403669724771
11
- MERaLiON-Llama-3-8B-Instruct,0.8841743119266054
12
  Meta-Llama-3.1-70B-Instruct,0.9529816513761468
13
  Qwen2_5_3B_Instruct,0.8245412844036697
14
  SeaLLMs-v3-7B-Chat,0.9403669724770642
15
  Qwen2_5_72B_Instruct,0.9334862385321101
 
16
  gemma-2-9b-it,0.9311926605504587
17
  Meta-Llama-3-70B-Instruct,0.9495412844036697
18
  Qwen2_5_14B_Instruct,0.9311926605504587
 
8
  Qwen2-72B-Instruct,0.9346330275229358
9
  Sailor2-8B-Chat,0.9461009174311926
10
  Meta-Llama-3-8B-Instruct,0.8784403669724771
 
11
  Meta-Llama-3.1-70B-Instruct,0.9529816513761468
12
  Qwen2_5_3B_Instruct,0.8245412844036697
13
  SeaLLMs-v3-7B-Chat,0.9403669724770642
14
  Qwen2_5_72B_Instruct,0.9334862385321101
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.8841743119266054
16
  gemma-2-9b-it,0.9311926605504587
17
  Meta-Llama-3-70B-Instruct,0.9495412844036697
18
  Qwen2_5_14B_Instruct,0.9311926605504587
results/flores_translation/zero_shot/ind2eng.csv CHANGED
@@ -6,12 +6,13 @@ Qwen2_5_32B_Instruct,0.3923422946746861
6
  Qwen2_5_7B_Instruct,0.36472669481333536
7
  Qwen2_5_1_5B_Instruct,0.2624938515155373
8
  Qwen2-72B-Instruct,0.4043588265556185
 
9
  Meta-Llama-3-8B-Instruct,0.33079891679041123
10
- MERaLiON-Llama-3-8B-Instruct,0.38376586000725804
11
  Meta-Llama-3.1-70B-Instruct,0.43366494500251235
12
  Qwen2_5_3B_Instruct,0.3316936422167389
13
  SeaLLMs-v3-7B-Chat,0.3594829412574955
14
  Qwen2_5_72B_Instruct,0.4215612766585066
 
15
  gemma-2-9b-it,0.40786563079141763
16
  Meta-Llama-3-70B-Instruct,0.3830092775167675
17
  Qwen2_5_14B_Instruct,0.3901044620348051
 
6
  Qwen2_5_7B_Instruct,0.36472669481333536
7
  Qwen2_5_1_5B_Instruct,0.2624938515155373
8
  Qwen2-72B-Instruct,0.4043588265556185
9
+ Sailor2-8B-Chat,0.30613567466028746
10
  Meta-Llama-3-8B-Instruct,0.33079891679041123
 
11
  Meta-Llama-3.1-70B-Instruct,0.43366494500251235
12
  Qwen2_5_3B_Instruct,0.3316936422167389
13
  SeaLLMs-v3-7B-Chat,0.3594829412574955
14
  Qwen2_5_72B_Instruct,0.4215612766585066
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.38376586000725804
16
  gemma-2-9b-it,0.40786563079141763
17
  Meta-Llama-3-70B-Instruct,0.3830092775167675
18
  Qwen2_5_14B_Instruct,0.3901044620348051
results/flores_translation/zero_shot/vie2eng.csv CHANGED
@@ -6,12 +6,13 @@ Qwen2_5_32B_Instruct,0.33791529833420336
6
  Qwen2_5_7B_Instruct,0.3027564749728372
7
  Qwen2_5_1_5B_Instruct,0.21935649300365245
8
  Qwen2-72B-Instruct,0.33005323227052946
 
9
  Meta-Llama-3-8B-Instruct,0.2647448190950291
10
- MERaLiON-Llama-3-8B-Instruct,0.30900856944791294
11
  Meta-Llama-3.1-70B-Instruct,0.37244508311079816
12
  Qwen2_5_3B_Instruct,0.27312609009801636
13
  SeaLLMs-v3-7B-Chat,0.30981028289420137
14
  Qwen2_5_72B_Instruct,0.35733464866179004
 
15
  gemma-2-9b-it,0.3367700653885
16
  Meta-Llama-3-70B-Instruct,0.3230140263371192
17
  Qwen2_5_14B_Instruct,0.32198218156960645
 
6
  Qwen2_5_7B_Instruct,0.3027564749728372
7
  Qwen2_5_1_5B_Instruct,0.21935649300365245
8
  Qwen2-72B-Instruct,0.33005323227052946
9
+ Sailor2-8B-Chat,0.2508650753772058
10
  Meta-Llama-3-8B-Instruct,0.2647448190950291
 
11
  Meta-Llama-3.1-70B-Instruct,0.37244508311079816
12
  Qwen2_5_3B_Instruct,0.27312609009801636
13
  SeaLLMs-v3-7B-Chat,0.30981028289420137
14
  Qwen2_5_72B_Instruct,0.35733464866179004
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.30900856944791294
16
  gemma-2-9b-it,0.3367700653885
17
  Meta-Llama-3-70B-Instruct,0.3230140263371192
18
  Qwen2_5_14B_Instruct,0.32198218156960645
results/flores_translation/zero_shot/zho2eng.csv CHANGED
@@ -6,12 +6,13 @@ Qwen2_5_32B_Instruct,0.26924811164378015
6
  Qwen2_5_7B_Instruct,0.2437311220019033
7
  Qwen2_5_1_5B_Instruct,0.18420680441018222
8
  Qwen2-72B-Instruct,0.23893268538329387
 
9
  Meta-Llama-3-8B-Instruct,0.199495011482748
10
- MERaLiON-Llama-3-8B-Instruct,0.24133164017585856
11
  Meta-Llama-3.1-70B-Instruct,0.2832594176173152
12
  Qwen2_5_3B_Instruct,0.2245195134637718
13
  SeaLLMs-v3-7B-Chat,0.2516593644617717
14
  Qwen2_5_72B_Instruct,0.2843491241986514
 
15
  gemma-2-9b-it,0.267527968123433
16
  Meta-Llama-3-70B-Instruct,0.24397819518058994
17
  Qwen2_5_14B_Instruct,0.2627781200417998
 
6
  Qwen2_5_7B_Instruct,0.2437311220019033
7
  Qwen2_5_1_5B_Instruct,0.18420680441018222
8
  Qwen2-72B-Instruct,0.23893268538329387
9
+ Sailor2-8B-Chat,0.18385611872796095
10
  Meta-Llama-3-8B-Instruct,0.199495011482748
 
11
  Meta-Llama-3.1-70B-Instruct,0.2832594176173152
12
  Qwen2_5_3B_Instruct,0.2245195134637718
13
  SeaLLMs-v3-7B-Chat,0.2516593644617717
14
  Qwen2_5_72B_Instruct,0.2843491241986514
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.24133164017585856
16
  gemma-2-9b-it,0.267527968123433
17
  Meta-Llama-3-70B-Instruct,0.24397819518058994
18
  Qwen2_5_14B_Instruct,0.2627781200417998
results/flores_translation/zero_shot/zsm2eng.csv CHANGED
@@ -6,12 +6,13 @@ Qwen2_5_32B_Instruct,0.40310877536446654
6
  Qwen2_5_7B_Instruct,0.3466422765302921
7
  Qwen2_5_1_5B_Instruct,0.22890805100949677
8
  Qwen2-72B-Instruct,0.40796892621611885
 
9
  Meta-Llama-3-8B-Instruct,0.31625368345049
10
- MERaLiON-Llama-3-8B-Instruct,0.3729790018011108
11
  Meta-Llama-3.1-70B-Instruct,0.4462132282683508
12
  Qwen2_5_3B_Instruct,0.31056841204320457
13
  SeaLLMs-v3-7B-Chat,0.3484133510670942
14
  Qwen2_5_72B_Instruct,0.4237666988692159
 
15
  gemma-2-9b-it,0.4234100394581857
16
  Meta-Llama-3-70B-Instruct,0.3957287030176054
17
  Qwen2_5_14B_Instruct,0.3841042767934729
 
6
  Qwen2_5_7B_Instruct,0.3466422765302921
7
  Qwen2_5_1_5B_Instruct,0.22890805100949677
8
  Qwen2-72B-Instruct,0.40796892621611885
9
+ Sailor2-8B-Chat,0.3139160319283414
10
  Meta-Llama-3-8B-Instruct,0.31625368345049
 
11
  Meta-Llama-3.1-70B-Instruct,0.4462132282683508
12
  Qwen2_5_3B_Instruct,0.31056841204320457
13
  SeaLLMs-v3-7B-Chat,0.3484133510670942
14
  Qwen2_5_72B_Instruct,0.4237666988692159
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.3729790018011108
16
  gemma-2-9b-it,0.4234100394581857
17
  Meta-Llama-3-70B-Instruct,0.3957287030176054
18
  Qwen2_5_14B_Instruct,0.3841042767934729
results/fundamental_nlp_tasks/zero_shot/c3.csv CHANGED
@@ -6,12 +6,13 @@ Qwen2_5_32B_Instruct,0.9603590127150337
6
  Qwen2_5_7B_Instruct,0.9121166791323859
7
  Qwen2_5_1_5B_Instruct,0.793941660433807
8
  Qwen2-72B-Instruct,0.9611069558713538
 
9
  Meta-Llama-3-8B-Instruct,0.8515332834704562
10
- MERaLiON-Llama-3-8B-Instruct,0.8706058339566193
11
  Meta-Llama-3.1-70B-Instruct,0.9603590127150337
12
  Qwen2_5_3B_Instruct,0.8668661181750187
13
  SeaLLMs-v3-7B-Chat,0.9143605086013463
14
  Qwen2_5_72B_Instruct,0.9596110695587136
 
15
  gemma-2-9b-it,0.9222139117427075
16
  Meta-Llama-3-70B-Instruct,0.9521316379955124
17
  Qwen2_5_14B_Instruct,0.9502617801047121
 
6
  Qwen2_5_7B_Instruct,0.9121166791323859
7
  Qwen2_5_1_5B_Instruct,0.793941660433807
8
  Qwen2-72B-Instruct,0.9611069558713538
9
+ Sailor2-8B-Chat,0.8960359012715033
10
  Meta-Llama-3-8B-Instruct,0.8515332834704562
 
11
  Meta-Llama-3.1-70B-Instruct,0.9603590127150337
12
  Qwen2_5_3B_Instruct,0.8668661181750187
13
  SeaLLMs-v3-7B-Chat,0.9143605086013463
14
  Qwen2_5_72B_Instruct,0.9596110695587136
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.8706058339566193
16
  gemma-2-9b-it,0.9222139117427075
17
  Meta-Llama-3-70B-Instruct,0.9521316379955124
18
  Qwen2_5_14B_Instruct,0.9502617801047121
results/fundamental_nlp_tasks/zero_shot/cola.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.7497603068072867
8
  Qwen2-72B-Instruct,0.8341323106423778
9
  Sailor2-8B-Chat,0.7900287631831256
10
  Meta-Llama-3-8B-Instruct,0.6548418024928092
11
- MERaLiON-Llama-3-8B-Instruct,0.6174496644295302
12
  Meta-Llama-3.1-70B-Instruct,0.850431447746884
13
  Qwen2_5_3B_Instruct,0.6644295302013423
14
  SeaLLMs-v3-7B-Chat,0.785234899328859
15
  Qwen2_5_72B_Instruct,0.8571428571428571
 
16
  gemma-2-9b-it,0.7938638542665388
17
  Meta-Llama-3-70B-Instruct,0.835091083413231
18
  Qwen2_5_14B_Instruct,0.8063279002876318
 
8
  Qwen2-72B-Instruct,0.8341323106423778
9
  Sailor2-8B-Chat,0.7900287631831256
10
  Meta-Llama-3-8B-Instruct,0.6548418024928092
 
11
  Meta-Llama-3.1-70B-Instruct,0.850431447746884
12
  Qwen2_5_3B_Instruct,0.6644295302013423
13
  SeaLLMs-v3-7B-Chat,0.785234899328859
14
  Qwen2_5_72B_Instruct,0.8571428571428571
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.6174496644295302
16
  gemma-2-9b-it,0.7938638542665388
17
  Meta-Llama-3-70B-Instruct,0.835091083413231
18
  Qwen2_5_14B_Instruct,0.8063279002876318
results/fundamental_nlp_tasks/zero_shot/mnli.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.6045
8
  Qwen2-72B-Instruct,0.7925
9
  Sailor2-8B-Chat,0.664
10
  Meta-Llama-3-8B-Instruct,0.546
11
- MERaLiON-Llama-3-8B-Instruct,0.5375
12
  Meta-Llama-3.1-70B-Instruct,0.7015
13
  Qwen2_5_3B_Instruct,0.7465
14
  SeaLLMs-v3-7B-Chat,0.653
15
  Qwen2_5_72B_Instruct,0.8445
 
16
  gemma-2-9b-it,0.716
17
  Meta-Llama-3-70B-Instruct,0.6709421285692472
18
  Qwen2_5_14B_Instruct,0.818
 
8
  Qwen2-72B-Instruct,0.7925
9
  Sailor2-8B-Chat,0.664
10
  Meta-Llama-3-8B-Instruct,0.546
 
11
  Meta-Llama-3.1-70B-Instruct,0.7015
12
  Qwen2_5_3B_Instruct,0.7465
13
  SeaLLMs-v3-7B-Chat,0.653
14
  Qwen2_5_72B_Instruct,0.8445
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.5375
16
  gemma-2-9b-it,0.716
17
  Meta-Llama-3-70B-Instruct,0.6709421285692472
18
  Qwen2_5_14B_Instruct,0.818
results/fundamental_nlp_tasks/zero_shot/mrpc.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.6838235294117647
8
  Qwen2-72B-Instruct,0.8063725490196079
9
  Sailor2-8B-Chat,0.7769607843137255
10
  Meta-Llama-3-8B-Instruct,0.678921568627451
11
- MERaLiON-Llama-3-8B-Instruct,0.6274509803921569
12
  Meta-Llama-3.1-70B-Instruct,0.7696078431372549
13
  Qwen2_5_3B_Instruct,0.5661764705882353
14
  SeaLLMs-v3-7B-Chat,0.7475490196078431
15
  Qwen2_5_72B_Instruct,0.8014705882352942
 
16
  gemma-2-9b-it,0.7401960784313726
17
  Meta-Llama-3-70B-Instruct,0.7598039215686274
18
  Qwen2_5_14B_Instruct,0.7794117647058824
 
8
  Qwen2-72B-Instruct,0.8063725490196079
9
  Sailor2-8B-Chat,0.7769607843137255
10
  Meta-Llama-3-8B-Instruct,0.678921568627451
 
11
  Meta-Llama-3.1-70B-Instruct,0.7696078431372549
12
  Qwen2_5_3B_Instruct,0.5661764705882353
13
  SeaLLMs-v3-7B-Chat,0.7475490196078431
14
  Qwen2_5_72B_Instruct,0.8014705882352942
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.6274509803921569
16
  gemma-2-9b-it,0.7401960784313726
17
  Meta-Llama-3-70B-Instruct,0.7598039215686274
18
  Qwen2_5_14B_Instruct,0.7794117647058824
results/fundamental_nlp_tasks/zero_shot/ocnli.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.5135593220338983
8
  Qwen2-72B-Instruct,0.7820338983050847
9
  Sailor2-8B-Chat,0.5569491525423729
10
  Meta-Llama-3-8B-Instruct,0.44033898305084745
11
- MERaLiON-Llama-3-8B-Instruct,0.4633898305084746
12
  Meta-Llama-3.1-70B-Instruct,0.6423728813559322
13
  Qwen2_5_3B_Instruct,0.6145762711864406
14
  SeaLLMs-v3-7B-Chat,0.5698305084745763
15
  Qwen2_5_72B_Instruct,0.7684745762711864
 
16
  gemma-2-9b-it,0.6189830508474576
17
  Meta-Llama-3-70B-Instruct,0.5928813559322034
18
  Qwen2_5_14B_Instruct,0.7538983050847458
 
8
  Qwen2-72B-Instruct,0.7820338983050847
9
  Sailor2-8B-Chat,0.5569491525423729
10
  Meta-Llama-3-8B-Instruct,0.44033898305084745
 
11
  Meta-Llama-3.1-70B-Instruct,0.6423728813559322
12
  Qwen2_5_3B_Instruct,0.6145762711864406
13
  SeaLLMs-v3-7B-Chat,0.5698305084745763
14
  Qwen2_5_72B_Instruct,0.7684745762711864
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.4633898305084746
16
  gemma-2-9b-it,0.6189830508474576
17
  Meta-Llama-3-70B-Instruct,0.5928813559322034
18
  Qwen2_5_14B_Instruct,0.7538983050847458
results/fundamental_nlp_tasks/zero_shot/qnli.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.6148636280431997
8
  Qwen2-72B-Instruct,0.8887058392824455
9
  Sailor2-8B-Chat,0.6822258832143511
10
  Meta-Llama-3-8B-Instruct,0.6025993044114956
11
- MERaLiON-Llama-3-8B-Instruct,0.6522057477576423
12
  Meta-Llama-3.1-70B-Instruct,0.9026176093721399
13
  Qwen2_5_3B_Instruct,0.7645982061138569
14
  SeaLLMs-v3-7B-Chat,0.7159070107999268
15
  Qwen2_5_72B_Instruct,0.9082921471718836
 
16
  gemma-2-9b-it,0.9070107999267801
17
  Meta-Llama-3-70B-Instruct,0.876807614863628
18
  Qwen2_5_14B_Instruct,0.9079260479589969
 
8
  Qwen2-72B-Instruct,0.8887058392824455
9
  Sailor2-8B-Chat,0.6822258832143511
10
  Meta-Llama-3-8B-Instruct,0.6025993044114956
 
11
  Meta-Llama-3.1-70B-Instruct,0.9026176093721399
12
  Qwen2_5_3B_Instruct,0.7645982061138569
13
  SeaLLMs-v3-7B-Chat,0.7159070107999268
14
  Qwen2_5_72B_Instruct,0.9082921471718836
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.6522057477576423
16
  gemma-2-9b-it,0.9070107999267801
17
  Meta-Llama-3-70B-Instruct,0.876807614863628
18
  Qwen2_5_14B_Instruct,0.9079260479589969
results/fundamental_nlp_tasks/zero_shot/qqp.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.731
8
  Qwen2-72B-Instruct,0.8065
9
  Sailor2-8B-Chat,0.8205
10
  Meta-Llama-3-8B-Instruct,0.563
11
- MERaLiON-Llama-3-8B-Instruct,0.597
12
  Meta-Llama-3.1-70B-Instruct,0.815
13
  Qwen2_5_3B_Instruct,0.7415
14
  SeaLLMs-v3-7B-Chat,0.7625
15
  Qwen2_5_72B_Instruct,0.8315
 
16
  gemma-2-9b-it,0.7775
17
  Meta-Llama-3-70B-Instruct,0.7876082117239673
18
  Qwen2_5_14B_Instruct,0.8255
 
8
  Qwen2-72B-Instruct,0.8065
9
  Sailor2-8B-Chat,0.8205
10
  Meta-Llama-3-8B-Instruct,0.563
 
11
  Meta-Llama-3.1-70B-Instruct,0.815
12
  Qwen2_5_3B_Instruct,0.7415
13
  SeaLLMs-v3-7B-Chat,0.7625
14
  Qwen2_5_72B_Instruct,0.8315
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.597
16
  gemma-2-9b-it,0.7775
17
  Meta-Llama-3-70B-Instruct,0.7876082117239673
18
  Qwen2_5_14B_Instruct,0.8255
results/fundamental_nlp_tasks/zero_shot/rte.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.703971119133574
8
  Qwen2-72B-Instruct,0.8447653429602888
9
  Sailor2-8B-Chat,0.8122743682310469
10
  Meta-Llama-3-8B-Instruct,0.6173285198555957
11
- MERaLiON-Llama-3-8B-Instruct,0.6606498194945848
12
  Meta-Llama-3.1-70B-Instruct,0.8483754512635379
13
  Qwen2_5_3B_Instruct,0.779783393501805
14
  SeaLLMs-v3-7B-Chat,0.7870036101083032
15
  Qwen2_5_72B_Instruct,0.9025270758122743
 
16
  gemma-2-9b-it,0.7472924187725631
17
  Meta-Llama-3-70B-Instruct,0.8086642599277978
18
  Qwen2_5_14B_Instruct,0.8664259927797834
 
8
  Qwen2-72B-Instruct,0.8447653429602888
9
  Sailor2-8B-Chat,0.8122743682310469
10
  Meta-Llama-3-8B-Instruct,0.6173285198555957
 
11
  Meta-Llama-3.1-70B-Instruct,0.8483754512635379
12
  Qwen2_5_3B_Instruct,0.779783393501805
13
  SeaLLMs-v3-7B-Chat,0.7870036101083032
14
  Qwen2_5_72B_Instruct,0.9025270758122743
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.6606498194945848
16
  gemma-2-9b-it,0.7472924187725631
17
  Meta-Llama-3-70B-Instruct,0.8086642599277978
18
  Qwen2_5_14B_Instruct,0.8664259927797834
results/fundamental_nlp_tasks/zero_shot/wnli.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.4647887323943662
8
  Qwen2-72B-Instruct,0.8873239436619719
9
  Sailor2-8B-Chat,0.5492957746478874
10
  Meta-Llama-3-8B-Instruct,0.4788732394366197
11
- MERaLiON-Llama-3-8B-Instruct,0.5492957746478874
12
  Meta-Llama-3.1-70B-Instruct,0.8450704225352113
13
  Qwen2_5_3B_Instruct,0.647887323943662
14
  SeaLLMs-v3-7B-Chat,0.5915492957746479
15
  Qwen2_5_72B_Instruct,0.8169014084507042
 
16
  gemma-2-9b-it,0.7746478873239436
17
  Meta-Llama-3-70B-Instruct,0.7887323943661971
18
  Qwen2_5_14B_Instruct,0.8309859154929577
 
8
  Qwen2-72B-Instruct,0.8873239436619719
9
  Sailor2-8B-Chat,0.5492957746478874
10
  Meta-Llama-3-8B-Instruct,0.4788732394366197
 
11
  Meta-Llama-3.1-70B-Instruct,0.8450704225352113
12
  Qwen2_5_3B_Instruct,0.647887323943662
13
  SeaLLMs-v3-7B-Chat,0.5915492957746479
14
  Qwen2_5_72B_Instruct,0.8169014084507042
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.5492957746478874
16
  gemma-2-9b-it,0.7746478873239436
17
  Meta-Llama-3-70B-Instruct,0.7887323943661971
18
  Qwen2_5_14B_Instruct,0.8309859154929577
results/general_reasoning/zero_shot/c_eval.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.5971357409713575
8
  Qwen2-72B-Instruct,0.8312577833125778
9
  Sailor2-8B-Chat,0.5946450809464509
10
  Meta-Llama-3-8B-Instruct,0.4775840597758406
11
- MERaLiON-Llama-3-8B-Instruct,0.5205479452054794
12
  Meta-Llama-3.1-70B-Instruct,0.6612702366127023
13
  Qwen2_5_3B_Instruct,0.6537982565379825
14
  SeaLLMs-v3-7B-Chat,0.7658779576587795
15
  Qwen2_5_72B_Instruct,0.8325031133250311
 
16
  gemma-2-9b-it,0.5523038605230386
17
  Meta-Llama-3-70B-Instruct,0.6220423412204235
18
  Qwen2_5_14B_Instruct,0.7839352428393525
 
8
  Qwen2-72B-Instruct,0.8312577833125778
9
  Sailor2-8B-Chat,0.5946450809464509
10
  Meta-Llama-3-8B-Instruct,0.4775840597758406
 
11
  Meta-Llama-3.1-70B-Instruct,0.6612702366127023
12
  Qwen2_5_3B_Instruct,0.6537982565379825
13
  SeaLLMs-v3-7B-Chat,0.7658779576587795
14
  Qwen2_5_72B_Instruct,0.8325031133250311
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.5205479452054794
16
  gemma-2-9b-it,0.5523038605230386
17
  Meta-Llama-3-70B-Instruct,0.6220423412204235
18
  Qwen2_5_14B_Instruct,0.7839352428393525
results/general_reasoning/zero_shot/cmmlu_no_prompt.csv CHANGED
@@ -1 +1,13 @@
1
  Model,Accuracy
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  Model,Accuracy
2
+ Qwen2-7B-Instruct,0.7667932999481955
3
+ Meta-Llama-3.1-8B-Instruct,0.5438611638749784
4
+ llama3-8b-cpt-sea-lionv2.1-instruct,0.5098428596097393
5
+ Qwen2_5_7B_Instruct,0.7684337765498187
6
+ Qwen2_5_1_5B_Instruct,0.6070626834743568
7
+ Sailor2-8B-Chat,0.652909687446037
8
+ Meta-Llama-3-8B-Instruct,0.5214125366948713
9
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.555776204455189
10
+ gemma-2-9b-it,0.588154032118805
11
+ Qwen2_5_14B_Instruct,0.79951649110689
12
+ gemma2-9b-cpt-sea-lionv3-instruct,0.5871179416335693
13
+ Qwen2_5_0_5B_Instruct,0.42730098428596097
results/general_reasoning/zero_shot/indommlu_no_prompt.csv CHANGED
@@ -1,11 +1,15 @@
1
  Model,Accuracy
 
2
  Meta-Llama-3.1-8B-Instruct,0.5483009546698712
3
  llama3-8b-cpt-sea-lionv2.1-instruct,0.559516656652647
4
  Qwen2_5_7B_Instruct,0.581814540356499
 
5
  Sailor2-8B-Chat,0.6342212430736365
6
  Meta-Llama-3-8B-Instruct,0.537686093864744
7
- MERaLiON-Llama-3-8B-Instruct,0.575806128580012
8
  SeaLLMs-v3-7B-Chat,0.5406235396221376
 
9
  gemma-2-9b-it,0.6210027371653648
 
10
  gemma2-9b-cpt-sea-lionv3-instruct,0.6258762267174044
 
11
  GPT4o_0513,0.7599305694639161
 
1
  Model,Accuracy
2
+ Qwen2-7B-Instruct,0.5434274651178316
3
  Meta-Llama-3.1-8B-Instruct,0.5483009546698712
4
  llama3-8b-cpt-sea-lionv2.1-instruct,0.559516656652647
5
  Qwen2_5_7B_Instruct,0.581814540356499
6
+ Qwen2_5_1_5B_Instruct,0.41337873022231125
7
  Sailor2-8B-Chat,0.6342212430736365
8
  Meta-Llama-3-8B-Instruct,0.537686093864744
 
9
  SeaLLMs-v3-7B-Chat,0.5406235396221376
10
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.575806128580012
11
  gemma-2-9b-it,0.6210027371653648
12
+ Qwen2_5_14B_Instruct,0.6356899659523333
13
  gemma2-9b-cpt-sea-lionv3-instruct,0.6258762267174044
14
+ Qwen2_5_0_5B_Instruct,0.3137058548634755
15
  GPT4o_0513,0.7599305694639161
results/general_reasoning/zero_shot/mmlu_no_prompt.csv CHANGED
@@ -1,10 +1,14 @@
1
  Model,Accuracy
 
2
  Meta-Llama-3.1-8B-Instruct,0.7056131569538792
3
  llama3-8b-cpt-sea-lionv2.1-instruct,0.6454057919199142
4
  Qwen2_5_7B_Instruct,0.73936360386128
 
5
  Meta-Llama-3-8B-Instruct,0.6735788344654987
6
- MERaLiON-Llama-3-8B-Instruct,0.6988916696460493
7
  SeaLLMs-v3-7B-Chat,0.6913836253128351
 
8
  gemma-2-9b-it,0.740293171254916
 
9
  gemma2-9b-cpt-sea-lionv3-instruct,0.7372899535216303
 
10
  GPT4o_0513,0.871576689309975
 
1
  Model,Accuracy
2
+ Qwen2-7B-Instruct,0.7032534858777262
3
  Meta-Llama-3.1-8B-Instruct,0.7056131569538792
4
  llama3-8b-cpt-sea-lionv2.1-instruct,0.6454057919199142
5
  Qwen2_5_7B_Instruct,0.73936360386128
6
+ Sailor2-8B-Chat,0.6406149445834823
7
  Meta-Llama-3-8B-Instruct,0.6735788344654987
 
8
  SeaLLMs-v3-7B-Chat,0.6913836253128351
9
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.6988916696460493
10
  gemma-2-9b-it,0.740293171254916
11
+ Qwen2_5_14B_Instruct,0.7939220593493028
12
  gemma2-9b-cpt-sea-lionv3-instruct,0.7372899535216303
13
+ Qwen2_5_0_5B_Instruct,0.4267429388630676
14
  GPT4o_0513,0.871576689309975
results/general_reasoning/zero_shot/zbench.csv CHANGED
@@ -8,11 +8,11 @@ Qwen2_5_1_5B_Instruct,0.42424242424242425
8
  Qwen2-72B-Instruct,0.5757575757575758
9
  Sailor2-8B-Chat,0.5151515151515151
10
  Meta-Llama-3-8B-Instruct,0.3333333333333333
11
- MERaLiON-Llama-3-8B-Instruct,0.42424242424242425
12
  Meta-Llama-3.1-70B-Instruct,0.48484848484848486
13
  Qwen2_5_3B_Instruct,0.5757575757575758
14
  SeaLLMs-v3-7B-Chat,0.5454545454545454
15
  Qwen2_5_72B_Instruct,0.696969696969697
 
16
  gemma-2-9b-it,0.48484848484848486
17
  Meta-Llama-3-70B-Instruct,0.5151515151515151
18
  Qwen2_5_14B_Instruct,0.6666666666666666
 
8
  Qwen2-72B-Instruct,0.5757575757575758
9
  Sailor2-8B-Chat,0.5151515151515151
10
  Meta-Llama-3-8B-Instruct,0.3333333333333333
 
11
  Meta-Llama-3.1-70B-Instruct,0.48484848484848486
12
  Qwen2_5_3B_Instruct,0.5757575757575758
13
  SeaLLMs-v3-7B-Chat,0.5454545454545454
14
  Qwen2_5_72B_Instruct,0.696969696969697
15
+ meralion-merged-llama3-8b-sg-inst-avg-diff,0.42424242424242425
16
  gemma-2-9b-it,0.48484848484848486
17
  Meta-Llama-3-70B-Instruct,0.5151515151515151
18
  Qwen2_5_14B_Instruct,0.6666666666666666