LR2Bench / Crossword.json
vzl123's picture
Upload 7 files
2462ff4 verified
raw
history blame
2.67 kB
[
{
"Model": "llama-3.1-8b",
"CR": "61.3",
"S-Acc": "23.3",
"EM": "0.0",
"PM-0.5": "14.0",
"Tokens": "2887"
},
{
"Model": "llama-3.1-70b",
"CR": "77.3",
"S-Acc": "46.8",
"EM": "0.0",
"PM-0.5": "62.0",
"Tokens": "3071"
},
{
"Model": "llama-3.3-70b",
"CR": "85.3",
"S-Acc": "47.6",
"EM": "0.0",
"PM-0.5": "65.3",
"Tokens": "2612"
},
{
"Model": "mistral-7b",
"CR": "94.0",
"S-Acc": "23.0",
"EM": "0.0",
"PM-0.5": "6.7",
"Tokens": "3655"
},
{
"Model": "mistral-small-22b",
"CR": "98.7",
"S-Acc": "48.3",
"EM": "0.0",
"PM-0.5": "54.0",
"Tokens": "3134"
},
{
"Model": "mistral-large-123b",
"CR": "99.3",
"S-Acc": "62.8",
"EM": "2.0",
"PM-0.5": "86.0",
"Tokens": "3237"
},
{
"Model": "qwen-2.5-7b",
"CR": "98.7",
"S-Acc": "21.1",
"EM": "0.0",
"PM-0.5": "3.3",
"Tokens": "2441"
},
{
"Model": "qwen-2.5-32b",
"CR": "100.0",
"S-Acc": "34.6",
"EM": "0.0",
"PM-0.5": "20.0",
"Tokens": "2560"
},
{
"Model": "qwen-2.5-72b",
"CR": "100.0",
"S-Acc": "44.1",
"EM": "0.0",
"PM-0.5": "36.7",
"Tokens": "2734"
},
{
"Model": "qwq-32b",
"CR": "80.0",
"S-Acc": "30.2",
"EM": "0.0",
"PM-0.5": "18.0",
"Tokens": "4816"
},
{
"Model": "deepseek-R1",
"CR": "100.0",
"S-Acc": "75.3",
"EM": "16.7",
"PM-0.5": "94.0",
"Tokens": "9809"
},
{
"Model": "gemini-2.0-exp",
"CR": "98.7",
"S-Acc": "61.6",
"EM": "0.0",
"PM-0.5": "83.3",
"Tokens": "2555"
},
{
"Model": "gemini-2.0-thinking",
"CR": "94.7",
"S-Acc": "57.7",
"EM": "1.3",
"PM-0.5": "79.3",
"Tokens": "2648"
},
{
"Model": "openai-gpt-4o",
"CR": "100.0",
"S-Acc": "63.0",
"EM": "1.3",
"PM-0.5": "86.7",
"Tokens": "1726"
},
{
"Model": "openai-o1-mini",
"CR": "95.3",
"S-Acc": "45.5",
"EM": "1.3",
"PM-0.5": "54.0",
"Tokens": "7840"
},
{
"Model": "openai-o1-preview",
"CR": "98.0",
"S-Acc": "77.7",
"EM": "24.7",
"PM-0.5": "89.3",
"Tokens": "10098"
}
]