Steelskull
commited on
Update README.md
Browse files
README.md
CHANGED
@@ -1247,13 +1247,12 @@ a:hover .link-arrow {
|
|
1247 |
</div>
|
1248 |
</div>
|
1249 |
-->
|
1250 |
-
<!-- Open LLM-Benchmark Results (Temporarily Hidden)
|
1251 |
<h2>Open LLM-Benchmark Results:</h2>
|
1252 |
<div class="benchmark-container">
|
1253 |
<div class="benchmark-notification">
|
1254 |
<div class="notification-content">
|
1255 |
<span class="notification-text">
|
1256 |
-
Average Score: 43.
|
1257 |
<a href="https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard#/?rankingMode=dynamic" target="_blank" class="benchmark-link">
|
1258 |
View Full Leaderboard →
|
1259 |
</a>
|
@@ -1264,60 +1263,59 @@ a:hover .link-arrow {
|
|
1264 |
<div class="progress-metric">
|
1265 |
<div class="progress-label">
|
1266 |
<span>IFEval</span>
|
1267 |
-
<span class="progress-value">
|
1268 |
</div>
|
1269 |
<div class="progress-bar">
|
1270 |
-
<div class="progress-fill" style="width:
|
1271 |
</div>
|
1272 |
</div>
|
1273 |
<div class="progress-metric">
|
1274 |
<div class="progress-label">
|
1275 |
<span>BBH</span>
|
1276 |
-
<span class="progress-value">56.
|
1277 |
</div>
|
1278 |
<div class="progress-bar">
|
1279 |
-
<div class="progress-fill" style="width: 56.
|
1280 |
</div>
|
1281 |
</div>
|
1282 |
<div class="progress-metric">
|
1283 |
<div class="progress-label">
|
1284 |
<span>MATH</span>
|
1285 |
-
<span class="progress-value">
|
1286 |
</div>
|
1287 |
<div class="progress-bar">
|
1288 |
-
<div class="progress-fill" style="width:
|
1289 |
</div>
|
1290 |
</div>
|
1291 |
<div class="progress-metric">
|
1292 |
<div class="progress-label">
|
1293 |
<span>GPQA</span>
|
1294 |
-
<span class="progress-value">29.
|
1295 |
</div>
|
1296 |
<div class="progress-bar">
|
1297 |
-
<div class="progress-fill" style="width: 29.
|
1298 |
</div>
|
1299 |
</div>
|
1300 |
<div class="progress-metric">
|
1301 |
<div class="progress-label">
|
1302 |
<span>MUSR</span>
|
1303 |
-
<span class="progress-value">
|
1304 |
</div>
|
1305 |
<div class="progress-bar">
|
1306 |
-
<div class="progress-fill" style="width:
|
1307 |
</div>
|
1308 |
</div>
|
1309 |
<div class="progress-metric">
|
1310 |
<div class="progress-label">
|
1311 |
<span>MMLU-Pro</span>
|
1312 |
-
<span class="progress-value">
|
1313 |
</div>
|
1314 |
<div class="progress-bar">
|
1315 |
-
<div class="progress-fill" style="width:
|
1316 |
</div>
|
1317 |
</div>
|
1318 |
</div>
|
1319 |
</div>
|
1320 |
-
-->
|
1321 |
<div class="section-container">
|
1322 |
<h2>Reccomended Templates & Prompts</h2>
|
1323 |
<div class="template-card">
|
@@ -1384,19 +1382,4 @@ a:hover .link-arrow {
|
|
1384 |
</div>
|
1385 |
</div>
|
1386 |
</body>
|
1387 |
-
</html>
|
1388 |
-
|
1389 |
-
# [Open LLM Leaderboard Evaluation Results](https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard)
|
1390 |
-
Detailed results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/Steelskull__L3.3-Nevoria-R1-70b-details)!
|
1391 |
-
Summarized results can be found [here](https://huggingface.co/datasets/open-llm-leaderboard/contents/viewer/default/train?q=Steelskull%2FL3.3-Nevoria-R1-70b&sort[column]=Average%20%E2%AC%86%EF%B8%8F&sort[direction]=desc)!
|
1392 |
-
|
1393 |
-
| Metric |Value (%)|
|
1394 |
-
|-------------------|--------:|
|
1395 |
-
|**Average** | 43.68|
|
1396 |
-
|IFEval (0-Shot) | 60.24|
|
1397 |
-
|BBH (3-Shot) | 56.17|
|
1398 |
-
|MATH Lvl 5 (4-Shot)| 46.68|
|
1399 |
-
|GPQA (0-shot) | 29.19|
|
1400 |
-
|MuSR (0-shot) | 20.19|
|
1401 |
-
|MMLU-PRO (5-shot) | 49.59|
|
1402 |
-
|
|
|
1247 |
</div>
|
1248 |
</div>
|
1249 |
-->
|
|
|
1250 |
<h2>Open LLM-Benchmark Results:</h2>
|
1251 |
<div class="benchmark-container">
|
1252 |
<div class="benchmark-notification">
|
1253 |
<div class="notification-content">
|
1254 |
<span class="notification-text">
|
1255 |
+
Average Score: 43.68%
|
1256 |
<a href="https://huggingface.co/spaces/open-llm-leaderboard/open_llm_leaderboard#/?rankingMode=dynamic" target="_blank" class="benchmark-link">
|
1257 |
View Full Leaderboard →
|
1258 |
</a>
|
|
|
1263 |
<div class="progress-metric">
|
1264 |
<div class="progress-label">
|
1265 |
<span>IFEval</span>
|
1266 |
+
<span class="progress-value">60.24%</span>
|
1267 |
</div>
|
1268 |
<div class="progress-bar">
|
1269 |
+
<div class="progress-fill" style="width: 60.24%"></div>
|
1270 |
</div>
|
1271 |
</div>
|
1272 |
<div class="progress-metric">
|
1273 |
<div class="progress-label">
|
1274 |
<span>BBH</span>
|
1275 |
+
<span class="progress-value">56.17%</span>
|
1276 |
</div>
|
1277 |
<div class="progress-bar">
|
1278 |
+
<div class="progress-fill" style="width: 56.17%"></div>
|
1279 |
</div>
|
1280 |
</div>
|
1281 |
<div class="progress-metric">
|
1282 |
<div class="progress-label">
|
1283 |
<span>MATH</span>
|
1284 |
+
<span class="progress-value">46.68%</span>
|
1285 |
</div>
|
1286 |
<div class="progress-bar">
|
1287 |
+
<div class="progress-fill" style="width: 46.68%"></div>
|
1288 |
</div>
|
1289 |
</div>
|
1290 |
<div class="progress-metric">
|
1291 |
<div class="progress-label">
|
1292 |
<span>GPQA</span>
|
1293 |
+
<span class="progress-value">29.19%</span>
|
1294 |
</div>
|
1295 |
<div class="progress-bar">
|
1296 |
+
<div class="progress-fill" style="width: 29.19%"></div>
|
1297 |
</div>
|
1298 |
</div>
|
1299 |
<div class="progress-metric">
|
1300 |
<div class="progress-label">
|
1301 |
<span>MUSR</span>
|
1302 |
+
<span class="progress-value">20.19%</span>
|
1303 |
</div>
|
1304 |
<div class="progress-bar">
|
1305 |
+
<div class="progress-fill" style="width: 20.19%"></div>
|
1306 |
</div>
|
1307 |
</div>
|
1308 |
<div class="progress-metric">
|
1309 |
<div class="progress-label">
|
1310 |
<span>MMLU-Pro</span>
|
1311 |
+
<span class="progress-value">49.59%</span>
|
1312 |
</div>
|
1313 |
<div class="progress-bar">
|
1314 |
+
<div class="progress-fill" style="width: 49.59%"></div>
|
1315 |
</div>
|
1316 |
</div>
|
1317 |
</div>
|
1318 |
</div>
|
|
|
1319 |
<div class="section-container">
|
1320 |
<h2>Reccomended Templates & Prompts</h2>
|
1321 |
<div class="template-card">
|
|
|
1382 |
</div>
|
1383 |
</div>
|
1384 |
</body>
|
1385 |
+
</html>
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|