metadata
license: apache-2.0
pipeline_tag: text-generation
library_name: transformers
language:
- en
- am
- ar
- as
- az
- be
- bg
- bn
- br
- bs
- ca
- cs
- cy
- da
- de
- el
- eo
- es
- et
- eu
- fa
- ff
- fi
- fr
- fy
- ga
- gd
- gl
- gn
- gu
- ha
- he
- hi
- hr
- ht
- hu
- hy
- id
- ig
- is
- it
- ja
- jv
- ka
- kk
- km
- kn
- ko
- ku
- ky
- la
- lg
- li
- ln
- lo
- lt
- lv
- mg
- mk
- ml
- mn
- mr
- ms
- my
- ne
- nl
- 'no'
- ns
- om
- or
- pa
- pl
- ps
- pt
- qu
- rm
- ro
- ru
- sa
- si
- sc
- sd
- sk
- sl
- so
- sq
- sr
- ss
- su
- sv
- sw
- ta
- te
- th
- tl
- tn
- tr
- ug
- uk
- ur
- uz
- vi
- wo
- xh
- yi
- yo
- zu
datasets:
- bigcode/programming-languages-keywords
- bigcode/the-stack-smol-xs
- nampdn-ai/tiny-textbooks
- xu-song/cc100-samples
- m-a-p/CodeFeedback-Filtered-Instruction
- nampdn-ai/tiny-codes
- ajibawa-2023/Maths-College
- microsoft/orca-math-word-problems-200k
- mlabonne/FineTome-100k
- arcee-ai/agent-data
- cognitivecomputations/SystemChat-2.0
- badrex/llm-emoji-dataset
tags:
- litgpt
- litdata
tangled-llama-33m-32k-base-v0.1
lm-evaluation-harness
Tasks | Version | Filter | n-shot | Metric | Value | Stderr | ||
---|---|---|---|---|---|---|---|---|
arc_challenge | 1 | none | 0 | acc | ↑ | 0.2065 | ± | 0.0118 |
none | 0 | acc_norm | ↑ | 0.2338 | ± | 0.0124 | ||
gsm8k | 3 | flexible-extract | 5 | exact_match | ↑ | 0.0144 | ± | 0.0033 |
strict-match | 5 | exact_match | ↑ | 0.0000 | ± | 0.0000 | ||
hellaswag | 1 | none | 0 | acc | ↑ | 0.2614 | ± | 0.0044 |
none | 0 | acc_norm | ↑ | 0.2604 | ± | 0.0044 | ||
mmlu | 2 | none | acc | ↑ | 0.2280 | ± | 0.0035 | |
- humanities | 2 | none | acc | ↑ | 0.2376 | ± | 0.0062 | |
- formal_logic | 1 | none | 0 | acc | ↑ | 0.2698 | ± | 0.0397 |
- high_school_european_history | 1 | none | 0 | acc | ↑ | 0.2000 | ± | 0.0312 |
- high_school_us_history | 1 | none | 0 | acc | ↑ | 0.2500 | ± | 0.0304 |
- high_school_world_history | 1 | none | 0 | acc | ↑ | 0.2785 | ± | 0.0292 |
- international_law | 1 | none | 0 | acc | ↑ | 0.2149 | ± | 0.0375 |
- jurisprudence | 1 | none | 0 | acc | ↑ | 0.2407 | ± | 0.0413 |
- logical_fallacies | 1 | none | 0 | acc | ↑ | 0.2270 | ± | 0.0329 |
- moral_disputes | 1 | none | 0 | acc | ↑ | 0.2341 | ± | 0.0228 |
- moral_scenarios | 1 | none | 0 | acc | ↑ | 0.2380 | ± | 0.0142 |
- philosophy | 1 | none | 0 | acc | ↑ | 0.1833 | ± | 0.0220 |
- prehistory | 1 | none | 0 | acc | ↑ | 0.2160 | ± | 0.0229 |
- professional_law | 1 | none | 0 | acc | ↑ | 0.2399 | ± | 0.0109 |
- world_religions | 1 | none | 0 | acc | ↑ | 0.3275 | ± | 0.0360 |
- other | 2 | none | acc | ↑ | 0.2481 | ± | 0.0077 | |
- business_ethics | 1 | none | 0 | acc | ↑ | 0.3100 | ± | 0.0465 |
- clinical_knowledge | 1 | none | 0 | acc | ↑ | 0.2264 | ± | 0.0258 |
- college_medicine | 1 | none | 0 | acc | ↑ | 0.2486 | ± | 0.0330 |
- global_facts | 1 | none | 0 | acc | ↑ | 0.1700 | ± | 0.0378 |
- human_aging | 1 | none | 0 | acc | ↑ | 0.3229 | ± | 0.0314 |
- management | 1 | none | 0 | acc | ↑ | 0.1845 | ± | 0.0384 |
- marketing | 1 | none | 0 | acc | ↑ | 0.3034 | ± | 0.0301 |
- medical_genetics | 1 | none | 0 | acc | ↑ | 0.3100 | ± | 0.0465 |
- miscellaneous | 1 | none | 0 | acc | ↑ | 0.2401 | ± | 0.0153 |
- nutrition | 1 | none | 0 | acc | ↑ | 0.2418 | ± | 0.0245 |
- professional_accounting | 1 | none | 0 | acc | ↑ | 0.2411 | ± | 0.0255 |
- professional_medicine | 1 | none | 0 | acc | ↑ | 0.1838 | ± | 0.0235 |
- virology | 1 | none | 0 | acc | ↑ | 0.2831 | ± | 0.0351 |
- social sciences | 2 | none | acc | ↑ | 0.2155 | ± | 0.0074 | |
- econometrics | 1 | none | 0 | acc | ↑ | 0.2281 | ± | 0.0395 |
- high_school_geography | 1 | none | 0 | acc | ↑ | 0.1768 | ± | 0.0272 |
- high_school_government_and_politics | 1 | none | 0 | acc | ↑ | 0.1969 | ± | 0.0287 |
- high_school_macroeconomics | 1 | none | 0 | acc | ↑ | 0.2103 | ± | 0.0207 |
- high_school_microeconomics | 1 | none | 0 | acc | ↑ | 0.2143 | ± | 0.0267 |
- high_school_psychology | 1 | none | 0 | acc | ↑ | 0.1872 | ± | 0.0167 |
- human_sexuality | 1 | none | 0 | acc | ↑ | 0.2672 | ± | 0.0388 |
- professional_psychology | 1 | none | 0 | acc | ↑ | 0.2467 | ± | 0.0174 |
- public_relations | 1 | none | 0 | acc | ↑ | 0.2182 | ± | 0.0396 |
- security_studies | 1 | none | 0 | acc | ↑ | 0.1755 | ± | 0.0244 |
- sociology | 1 | none | 0 | acc | ↑ | 0.2438 | ± | 0.0304 |
- us_foreign_policy | 1 | none | 0 | acc | ↑ | 0.2700 | ± | 0.0446 |
- stem | 2 | none | acc | ↑ | 0.2058 | ± | 0.0072 | |
- abstract_algebra | 1 | none | 0 | acc | ↑ | 0.1700 | ± | 0.0378 |
- anatomy | 1 | none | 0 | acc | ↑ | 0.1778 | ± | 0.0330 |
- astronomy | 1 | none | 0 | acc | ↑ | 0.1776 | ± | 0.0311 |
- college_biology | 1 | none | 0 | acc | ↑ | 0.2569 | ± | 0.0365 |
- college_chemistry | 1 | none | 0 | acc | ↑ | 0.1900 | ± | 0.0394 |
- college_computer_science | 1 | none | 0 | acc | ↑ | 0.2600 | ± | 0.0441 |
- college_mathematics | 1 | none | 0 | acc | ↑ | 0.2100 | ± | 0.0409 |
- college_physics | 1 | none | 0 | acc | ↑ | 0.2059 | ± | 0.0402 |
- computer_security | 1 | none | 0 | acc | ↑ | 0.2400 | ± | 0.0429 |
- conceptual_physics | 1 | none | 0 | acc | ↑ | 0.2681 | ± | 0.0290 |
- electrical_engineering | 1 | none | 0 | acc | ↑ | 0.2345 | ± | 0.0353 |
- elementary_mathematics | 1 | none | 0 | acc | ↑ | 0.2011 | ± | 0.0206 |
- high_school_biology | 1 | none | 0 | acc | ↑ | 0.1806 | ± | 0.0219 |
- high_school_chemistry | 1 | none | 0 | acc | ↑ | 0.1478 | ± | 0.0250 |
- high_school_computer_science | 1 | none | 0 | acc | ↑ | 0.2400 | ± | 0.0429 |
- high_school_mathematics | 1 | none | 0 | acc | ↑ | 0.2111 | ± | 0.0249 |
- high_school_physics | 1 | none | 0 | acc | ↑ | 0.1788 | ± | 0.0313 |
- high_school_statistics | 1 | none | 0 | acc | ↑ | 0.1620 | ± | 0.0251 |
- machine_learning | 1 | none | 0 | acc | ↑ | 0.2768 | ± | 0.0425 |
truthfulqa_mc2 | 2 | none | 0 | acc | ↑ | 0.4975 | ± | 0.0165 |
winogrande | 1 | none | 0 | acc | ↑ | 0.5146 | ± | 0.0140 |
Groups | Version | Filter | n-shot | Metric | Value | Stderr | ||
---|---|---|---|---|---|---|---|---|
mmlu | 2 | none | acc | ↑ | 0.2280 | ± | 0.0035 | |
- humanities | 2 | none | acc | ↑ | 0.2376 | ± | 0.0062 | |
- other | 2 | none | acc | ↑ | 0.2481 | ± | 0.0077 | |
- social sciences | 2 | none | acc | ↑ | 0.2155 | ± | 0.0074 | |
- stem | 2 | none | acc | ↑ | 0.2058 | ± | 0.0072 |