flunardelli commited on
Commit
bba9479
Β·
verified Β·
1 Parent(s): 0985eef

Upload 3 files

Browse files
llm_metaeval_eval_harness_mmlu.ipynb CHANGED
@@ -13,189 +13,9 @@
13
  "cell_type": "code",
14
  "execution_count": null,
15
  "metadata": {
16
- "colab": {
17
- "base_uri": "https://localhost:8080/"
18
- },
19
- "id": "kGW7vfRkrqHe",
20
- "outputId": "a51e7052-6e2d-431e-a46b-63d92254c8cc"
21
  },
22
- "outputs": [
23
- {
24
- "output_type": "stream",
25
- "name": "stdout",
26
- "text": [
27
- "Collecting lm_eval@ git+https://github.com/EleutherAI/[email protected] (from -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
28
- " Cloning https://github.com/EleutherAI/lm-evaluation-harness.git (to revision v0.4.6) to /tmp/pip-install-pwn3euso/lm-eval_15e9ff6cb88d46aa9b65c222332a3c3b\n",
29
- " Running command git clone --filter=blob:none --quiet https://github.com/EleutherAI/lm-evaluation-harness.git /tmp/pip-install-pwn3euso/lm-eval_15e9ff6cb88d46aa9b65c222332a3c3b\n",
30
- " Running command git checkout -q 9d36354e6422db94c374dc476c093b1495370983\n",
31
- " Resolved https://github.com/EleutherAI/lm-evaluation-harness.git to commit 9d36354e6422db94c374dc476c093b1495370983\n",
32
- " Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
33
- " Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
34
- " Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
35
- "Requirement already satisfied: accelerate==1.1.1 in /usr/local/lib/python3.10/dist-packages (from -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (1.1.1)\n",
36
- "Collecting bitsandbytes==0.44.1 (from -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 2))\n",
37
- " Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)\n",
38
- "Requirement already satisfied: huggingface-hub>=0.21.0 in /usr/local/lib/python3.10/dist-packages (from accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (0.26.2)\n",
39
- "Requirement already satisfied: numpy<3.0.0,>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (1.26.4)\n",
40
- "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (24.2)\n",
41
- "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (5.9.5)\n",
42
- "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (6.0.2)\n",
43
- "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.10/dist-packages (from accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (0.4.5)\n",
44
- "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (2.5.1+cu121)\n",
45
- "Collecting evaluate (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
46
- " Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)\n",
47
- "Collecting datasets>=2.16.0 (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
48
- " Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)\n",
49
- "Collecting jsonlines (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
50
- " Downloading jsonlines-4.0.0-py3-none-any.whl.metadata (1.6 kB)\n",
51
- "Requirement already satisfied: numexpr in /usr/local/lib/python3.10/dist-packages (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2.10.1)\n",
52
- "Requirement already satisfied: peft>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (0.13.2)\n",
53
- "Collecting pybind11>=2.6.2 (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
54
- " Downloading pybind11-2.13.6-py3-none-any.whl.metadata (9.5 kB)\n",
55
- "Collecting pytablewriter (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
56
- " Downloading pytablewriter-1.2.0-py3-none-any.whl.metadata (37 kB)\n",
57
- "Collecting rouge-score>=0.0.4 (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
58
- " Downloading rouge_score-0.1.2.tar.gz (17 kB)\n",
59
- " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
60
- "Collecting sacrebleu>=1.5.0 (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
61
- " Downloading sacrebleu-2.4.3-py3-none-any.whl.metadata (51 kB)\n",
62
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.8/51.8 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
63
- "\u001b[?25hRequirement already satisfied: scikit-learn>=0.24.1 in /usr/local/lib/python3.10/dist-packages (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (1.5.2)\n",
64
- "Collecting sqlitedict (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
65
- " Downloading sqlitedict-2.1.0.tar.gz (21 kB)\n",
66
- " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
67
- "Collecting tqdm-multiprocess (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
68
- " Downloading tqdm_multiprocess-0.0.11-py3-none-any.whl.metadata (5.7 kB)\n",
69
- "Requirement already satisfied: transformers>=4.1 in /usr/local/lib/python3.10/dist-packages (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (4.46.2)\n",
70
- "Collecting zstandard (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
71
- " Downloading zstandard-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)\n",
72
- "Collecting dill (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
73
- " Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)\n",
74
- "Collecting word2number (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
75
- " Downloading word2number-1.1.zip (9.7 kB)\n",
76
- " Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
77
- "Requirement already satisfied: more_itertools in /usr/local/lib/python3.10/dist-packages (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (10.5.0)\n",
78
- "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (3.16.1)\n",
79
- "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (17.0.0)\n",
80
- "Collecting dill (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
81
- " Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n",
82
- "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2.2.2)\n",
83
- "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2.32.3)\n",
84
- "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (4.66.6)\n",
85
- "Collecting xxhash (from datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
86
- " Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
87
- "Collecting multiprocess<0.70.17 (from datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
88
- " Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n",
89
- "Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
90
- " Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n",
91
- "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (3.11.2)\n",
92
- "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.21.0->accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (4.12.2)\n",
93
- "Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from rouge-score>=0.0.4->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (1.4.0)\n",
94
- "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from rouge-score>=0.0.4->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (3.9.1)\n",
95
- "Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from rouge-score>=0.0.4->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (1.16.0)\n",
96
- "Collecting portalocker (from sacrebleu>=1.5.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
97
- " Downloading portalocker-3.0.0-py3-none-any.whl.metadata (8.5 kB)\n",
98
- "Requirement already satisfied: regex in /usr/local/lib/python3.10/dist-packages (from sacrebleu>=1.5.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2024.9.11)\n",
99
- "Requirement already satisfied: tabulate>=0.8.9 in /usr/local/lib/python3.10/dist-packages (from sacrebleu>=1.5.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (0.9.0)\n",
100
- "Collecting colorama (from sacrebleu>=1.5.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
101
- " Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)\n",
102
- "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from sacrebleu>=1.5.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (5.3.0)\n",
103
- "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.24.1->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (1.13.1)\n",
104
- "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.24.1->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (1.4.2)\n",
105
- "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.24.1->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (3.5.0)\n",
106
- "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (3.4.2)\n",
107
- "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (3.1.4)\n",
108
- "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (1.13.1)\n",
109
- "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch>=1.10.0->accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (1.3.0)\n",
110
- "Requirement already satisfied: tokenizers<0.21,>=0.20 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.1->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (0.20.3)\n",
111
- "Requirement already satisfied: attrs>=19.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonlines->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (24.2.0)\n",
112
- "Requirement already satisfied: setuptools>=38.3.0 in /usr/local/lib/python3.10/dist-packages (from pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (75.1.0)\n",
113
- "Collecting DataProperty<2,>=1.0.1 (from pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
114
- " Downloading DataProperty-1.0.1-py3-none-any.whl.metadata (11 kB)\n",
115
- "Collecting mbstrdecoder<2,>=1.0.0 (from pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
116
- " Downloading mbstrdecoder-1.1.3-py3-none-any.whl.metadata (4.0 kB)\n",
117
- "Collecting pathvalidate<4,>=2.3.0 (from pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
118
- " Downloading pathvalidate-3.2.1-py3-none-any.whl.metadata (12 kB)\n",
119
- "Collecting tabledata<2,>=1.3.1 (from pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
120
- " Downloading tabledata-1.3.3-py3-none-any.whl.metadata (3.7 kB)\n",
121
- "Collecting tcolorpy<1,>=0.0.5 (from pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
122
- " Downloading tcolorpy-0.1.6-py3-none-any.whl.metadata (6.4 kB)\n",
123
- "Collecting typepy<2,>=1.3.2 (from typepy[datetime]<2,>=1.3.2->pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
124
- " Downloading typepy-1.3.2-py3-none-any.whl.metadata (9.3 kB)\n",
125
- "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2.4.3)\n",
126
- "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (1.3.1)\n",
127
- "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (1.5.0)\n",
128
- "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (6.1.0)\n",
129
- "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (0.2.0)\n",
130
- "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (1.17.2)\n",
131
- "Requirement already satisfied: async-timeout<6.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (4.0.3)\n",
132
- "Requirement already satisfied: chardet<6,>=3.0.4 in /usr/local/lib/python3.10/dist-packages (from mbstrdecoder<2,>=1.0.0->pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (5.2.0)\n",
133
- "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (3.4.0)\n",
134
- "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (3.10)\n",
135
- "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2.2.3)\n",
136
- "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2024.8.30)\n",
137
- "Requirement already satisfied: python-dateutil<3.0.0,>=2.8.0 in /usr/local/lib/python3.10/dist-packages (from typepy[datetime]<2,>=1.3.2->pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2.8.2)\n",
138
- "Requirement already satisfied: pytz>=2018.9 in /usr/local/lib/python3.10/dist-packages (from typepy[datetime]<2,>=1.3.2->pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2024.2)\n",
139
- "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (3.0.2)\n",
140
- "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->rouge-score>=0.0.4->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (8.1.7)\n",
141
- "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2024.2)\n",
142
- "Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl (122.4 MB)\n",
143
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m122.4/122.4 MB\u001b[0m \u001b[31m18.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
144
- "\u001b[?25hDownloading datasets-3.1.0-py3-none-any.whl (480 kB)\n",
145
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m39.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
146
- "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
147
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
148
- "\u001b[?25hDownloading evaluate-0.4.3-py3-none-any.whl (84 kB)\n",
149
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.0/84.0 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
150
- "\u001b[?25hDownloading pybind11-2.13.6-py3-none-any.whl (243 kB)\n",
151
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m243.3/243.3 kB\u001b[0m \u001b[31m21.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
152
- "\u001b[?25hDownloading sacrebleu-2.4.3-py3-none-any.whl (103 kB)\n",
153
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m104.0/104.0 kB\u001b[0m \u001b[31m11.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
154
- "\u001b[?25hDownloading jsonlines-4.0.0-py3-none-any.whl (8.7 kB)\n",
155
- "Downloading pytablewriter-1.2.0-py3-none-any.whl (111 kB)\n",
156
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.1/111.1 kB\u001b[0m \u001b[31m12.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
157
- "\u001b[?25hDownloading tqdm_multiprocess-0.0.11-py3-none-any.whl (9.8 kB)\n",
158
- "Downloading zstandard-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)\n",
159
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.4/5.4 MB\u001b[0m \u001b[31m116.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
160
- "\u001b[?25hDownloading DataProperty-1.0.1-py3-none-any.whl (27 kB)\n",
161
- "Downloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n",
162
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m17.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
163
- "\u001b[?25hDownloading mbstrdecoder-1.1.3-py3-none-any.whl (7.8 kB)\n",
164
- "Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
165
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m14.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
166
- "\u001b[?25hDownloading pathvalidate-3.2.1-py3-none-any.whl (23 kB)\n",
167
- "Downloading tabledata-1.3.3-py3-none-any.whl (11 kB)\n",
168
- "Downloading tcolorpy-0.1.6-py3-none-any.whl (8.1 kB)\n",
169
- "Downloading typepy-1.3.2-py3-none-any.whl (31 kB)\n",
170
- "Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n",
171
- "Downloading portalocker-3.0.0-py3-none-any.whl (19 kB)\n",
172
- "Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
173
- "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m18.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
174
- "\u001b[?25hBuilding wheels for collected packages: lm_eval, rouge-score, sqlitedict, word2number\n",
175
- " Building wheel for lm_eval (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
176
- " Created wheel for lm_eval: filename=lm_eval-0.4.5-py3-none-any.whl size=2451555 sha256=eb0bd7b20cd5649e4d6ae45d6f8857dc7d4201219e5f1fb7f05857d0ce40dafd\n",
177
- " Stored in directory: /tmp/pip-ephem-wheel-cache-j3ph5vqa/wheels/c0/f5/fd/b1db03e4fd2a4661835862d036a87a42bf868abe99903e333d\n",
178
- " Building wheel for rouge-score (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
179
- " Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=4de791eeae05926c2d1b4219891a96ab45504e46c483cd51a238bca08943b392\n",
180
- " Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n",
181
- " Building wheel for sqlitedict (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
182
- " Created wheel for sqlitedict: filename=sqlitedict-2.1.0-py3-none-any.whl size=16864 sha256=18e74dd174299b493b0bd17c4980b9f5a6c3d25b1534eb5ed9aee6508528b32c\n",
183
- " Stored in directory: /root/.cache/pip/wheels/79/d6/e7/304e0e6cb2221022c26d8161f7c23cd4f259a9e41e8bbcfabd\n",
184
- " Building wheel for word2number (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
185
- " Created wheel for word2number: filename=word2number-1.1-py3-none-any.whl size=5568 sha256=3bff6053337559ad74336e3aa7d882744e338bd917ee99abc64e06dd30c22e8f\n",
186
- " Stored in directory: /root/.cache/pip/wheels/84/ff/26/d3cfbd971e96c5aa3737ecfced81628830d7359b55fbb8ca3b\n",
187
- "Successfully built lm_eval rouge-score sqlitedict word2number\n",
188
- "Installing collected packages: word2number, sqlitedict, zstandard, xxhash, tcolorpy, pybind11, portalocker, pathvalidate, mbstrdecoder, jsonlines, fsspec, dill, colorama, typepy, tqdm-multiprocess, sacrebleu, rouge-score, multiprocess, bitsandbytes, DataProperty, tabledata, datasets, pytablewriter, evaluate, lm_eval\n",
189
- " Attempting uninstall: fsspec\n",
190
- " Found existing installation: fsspec 2024.10.0\n",
191
- " Uninstalling fsspec-2024.10.0:\n",
192
- " Successfully uninstalled fsspec-2024.10.0\n",
193
- "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
194
- "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n",
195
- "\u001b[0mSuccessfully installed DataProperty-1.0.1 bitsandbytes-0.44.1 colorama-0.4.6 datasets-3.1.0 dill-0.3.8 evaluate-0.4.3 fsspec-2024.9.0 jsonlines-4.0.0 lm_eval-0.4.5 mbstrdecoder-1.1.3 multiprocess-0.70.16 pathvalidate-3.2.1 portalocker-3.0.0 pybind11-2.13.6 pytablewriter-1.2.0 rouge-score-0.1.2 sacrebleu-2.4.3 sqlitedict-2.1.0 tabledata-1.3.3 tcolorpy-0.1.6 tqdm-multiprocess-0.0.11 typepy-1.3.2 word2number-1.1 xxhash-3.5.0 zstandard-0.23.0\n"
196
- ]
197
- }
198
- ],
199
  "source": [
200
  "!pip install -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt"
201
  ]
@@ -312,42 +132,9 @@
312
  "cell_type": "code",
313
  "execution_count": null,
314
  "metadata": {
315
- "id": "uMoitxJkHerH",
316
- "colab": {
317
- "base_uri": "https://localhost:8080/",
318
- "height": 77,
319
- "referenced_widgets": [
320
- "7b825d3c10be4fa6abd7c115f8e4e1f2",
321
- "d426e85f0ce641d381d52927df68c278",
322
- "8f30879a15de4244adea101f6dc05c35",
323
- "96507b4059764b0493bbb286f4cd1c3a",
324
- "21cc893be48e460ab6e83e0f685249c2",
325
- "047db71ca65846f1bacf16fbcd9c40f8",
326
- "3813240ddf704b91ac556c05fe7499f7",
327
- "46f47846e9cd4b9b9cf321247fe0e518",
328
- "922f354c9c5b4c9ebae6ad0e1c87b760",
329
- "c1bdaff966f84b949b22b650311b6528",
330
- "f16a675994434ddb8b5983faf193b381"
331
- ]
332
- },
333
- "outputId": "b833672c-b855-494d-f480-8417ef199be8"
334
  },
335
- "outputs": [
336
- {
337
- "output_type": "display_data",
338
- "data": {
339
- "text/plain": [
340
- "samples_mmlu_all_2024-12-05T10-07-32.229420.jsonl: 0%| | 0.00/89.9M [00:00<?, ?B/s]"
341
- ],
342
- "application/vnd.jupyter.widget-view+json": {
343
- "version_major": 2,
344
- "version_minor": 0,
345
- "model_id": "7b825d3c10be4fa6abd7c115f8e4e1f2"
346
- }
347
- },
348
- "metadata": {}
349
- }
350
- ],
351
  "source": [
352
  "hf_upload_folder(BASE_FOLDER)"
353
  ]
@@ -372,42 +159,9 @@
372
  "cell_type": "code",
373
  "execution_count": null,
374
  "metadata": {
375
- "id": "eIUOqu5sHfkM",
376
- "colab": {
377
- "base_uri": "https://localhost:8080/",
378
- "height": 77,
379
- "referenced_widgets": [
380
- "79e90f6090ee493890acbf26292dd997",
381
- "d245bcee114a4216bbfc6a1999daca08",
382
- "e2acb9e61cec4083b3113415a205df12",
383
- "08a63fea880840a2a472008d98bb03d2",
384
- "004d4e339ec840c8bb748f4fdcef663a",
385
- "3f9668c6708247f4b65ae9204b5df832",
386
- "b1189ac61af44228a2e52026e73b0d2e",
387
- "8c89d406ae534cf0a7ef99eba4263b2b",
388
- "8adaf7159dcf4b318c2fc5930dd8633d",
389
- "24bd51fe6e81460f9a9c99a1961a4107",
390
- "2b0ffc94bfca468a89f0799cbef29ee5"
391
- ]
392
- },
393
- "outputId": "50a091b8-93f4-48dd-93ec-6839b33fbd7e"
394
  },
395
- "outputs": [
396
- {
397
- "output_type": "display_data",
398
- "data": {
399
- "text/plain": [
400
- "samples_mmlu_all_2024-12-05T10-28-27.223863.jsonl: 0%| | 0.00/89.9M [00:00<?, ?B/s]"
401
- ],
402
- "application/vnd.jupyter.widget-view+json": {
403
- "version_major": 2,
404
- "version_minor": 0,
405
- "model_id": "79e90f6090ee493890acbf26292dd997"
406
- }
407
- },
408
- "metadata": {}
409
- }
410
- ],
411
  "source": [
412
  "hf_upload_folder(BASE_FOLDER)"
413
  ]
@@ -432,42 +186,9 @@
432
  "cell_type": "code",
433
  "execution_count": null,
434
  "metadata": {
435
- "id": "xsL82Q4SHgMn",
436
- "colab": {
437
- "base_uri": "https://localhost:8080/",
438
- "height": 77,
439
- "referenced_widgets": [
440
- "c2b881fabd4f4758bd9aca516a579b4c",
441
- "6ab81736ee0643c7ae5e20978914fef4",
442
- "84ee9c147755422fb74229a2817ad35a",
443
- "35025439e8e545d28ee9f3c51df4f1bb",
444
- "da989b5fe95449f6928355cd317d7f4a",
445
- "ed8214d39c2e4e47a7b32e1152597642",
446
- "6210d27773144f78aed58c4ad9aaa0a2",
447
- "0e65b64cc85c4b52aa48c11f65ccceeb",
448
- "85504ad2f7be45e1a828b5077905dad5",
449
- "a638974916d74c239e287002bc1d089a",
450
- "a6fcb58400a54716bda1e4dc511add95"
451
- ]
452
- },
453
- "outputId": "29244830-d996-40bf-9f1f-77536ffe10f7"
454
  },
455
- "outputs": [
456
- {
457
- "output_type": "display_data",
458
- "data": {
459
- "text/plain": [
460
- "samples_mmlu_all_2024-12-05T11-08-04.943164.jsonl: 0%| | 0.00/89.9M [00:00<?, ?B/s]"
461
- ],
462
- "application/vnd.jupyter.widget-view+json": {
463
- "version_major": 2,
464
- "version_minor": 0,
465
- "model_id": "c2b881fabd4f4758bd9aca516a579b4c"
466
- }
467
- },
468
- "metadata": {}
469
- }
470
- ],
471
  "source": [
472
  "hf_upload_folder(BASE_FOLDER)"
473
  ]
@@ -486,21 +207,9 @@
486
  "execution_count": null,
487
  "metadata": {
488
  "collapsed": true,
489
- "id": "ilu9_ulWTy3p",
490
- "colab": {
491
- "base_uri": "https://localhost:8080/"
492
- },
493
- "outputId": "b87c2a88-022e-40c5-861b-90c51b8affbe"
494
  },
495
- "outputs": [
496
- {
497
- "output_type": "stream",
498
- "name": "stdout",
499
- "text": [
500
- "^C\n"
501
- ]
502
- }
503
- ],
504
  "source": [
505
  "!lm_eval \\\n",
506
  "--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,revision=41bd4c9e7e4fb318ca40e721131d4933966c2cc1,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n",
@@ -514,22 +223,9 @@
514
  "cell_type": "code",
515
  "execution_count": null,
516
  "metadata": {
517
- "id": "jE5r8gVDHhAz",
518
- "colab": {
519
- "base_uri": "https://localhost:8080/"
520
- },
521
- "outputId": "23da6a08-c279-4d2c-96b1-d2d849c77d3d"
522
  },
523
- "outputs": [
524
- {
525
- "output_type": "stream",
526
- "name": "stderr",
527
- "text": [
528
- "No files have been modified since last commit. Skipping to prevent empty commit.\n",
529
- "WARNING:huggingface_hub.hf_api:No files have been modified since last commit. Skipping to prevent empty commit.\n"
530
- ]
531
- }
532
- ],
533
  "source": [
534
  "hf_upload_folder(BASE_FOLDER)"
535
  ]
@@ -548,1036 +244,6 @@
548
  },
549
  "language_info": {
550
  "name": "python"
551
- },
552
- "widgets": {
553
- "application/vnd.jupyter.widget-state+json": {
554
- "7b825d3c10be4fa6abd7c115f8e4e1f2": {
555
- "model_module": "@jupyter-widgets/controls",
556
- "model_name": "HBoxModel",
557
- "model_module_version": "1.5.0",
558
- "state": {
559
- "_dom_classes": [],
560
- "_model_module": "@jupyter-widgets/controls",
561
- "_model_module_version": "1.5.0",
562
- "_model_name": "HBoxModel",
563
- "_view_count": null,
564
- "_view_module": "@jupyter-widgets/controls",
565
- "_view_module_version": "1.5.0",
566
- "_view_name": "HBoxView",
567
- "box_style": "",
568
- "children": [
569
- "IPY_MODEL_d426e85f0ce641d381d52927df68c278",
570
- "IPY_MODEL_8f30879a15de4244adea101f6dc05c35",
571
- "IPY_MODEL_96507b4059764b0493bbb286f4cd1c3a"
572
- ],
573
- "layout": "IPY_MODEL_21cc893be48e460ab6e83e0f685249c2"
574
- }
575
- },
576
- "d426e85f0ce641d381d52927df68c278": {
577
- "model_module": "@jupyter-widgets/controls",
578
- "model_name": "HTMLModel",
579
- "model_module_version": "1.5.0",
580
- "state": {
581
- "_dom_classes": [],
582
- "_model_module": "@jupyter-widgets/controls",
583
- "_model_module_version": "1.5.0",
584
- "_model_name": "HTMLModel",
585
- "_view_count": null,
586
- "_view_module": "@jupyter-widgets/controls",
587
- "_view_module_version": "1.5.0",
588
- "_view_name": "HTMLView",
589
- "description": "",
590
- "description_tooltip": null,
591
- "layout": "IPY_MODEL_047db71ca65846f1bacf16fbcd9c40f8",
592
- "placeholder": "​",
593
- "style": "IPY_MODEL_3813240ddf704b91ac556c05fe7499f7",
594
- "value": "samples_mmlu_all_2024-12-05T10-07-32.229420.jsonl: 100%"
595
- }
596
- },
597
- "8f30879a15de4244adea101f6dc05c35": {
598
- "model_module": "@jupyter-widgets/controls",
599
- "model_name": "FloatProgressModel",
600
- "model_module_version": "1.5.0",
601
- "state": {
602
- "_dom_classes": [],
603
- "_model_module": "@jupyter-widgets/controls",
604
- "_model_module_version": "1.5.0",
605
- "_model_name": "FloatProgressModel",
606
- "_view_count": null,
607
- "_view_module": "@jupyter-widgets/controls",
608
- "_view_module_version": "1.5.0",
609
- "_view_name": "ProgressView",
610
- "bar_style": "success",
611
- "description": "",
612
- "description_tooltip": null,
613
- "layout": "IPY_MODEL_46f47846e9cd4b9b9cf321247fe0e518",
614
- "max": 89903482,
615
- "min": 0,
616
- "orientation": "horizontal",
617
- "style": "IPY_MODEL_922f354c9c5b4c9ebae6ad0e1c87b760",
618
- "value": 89903482
619
- }
620
- },
621
- "96507b4059764b0493bbb286f4cd1c3a": {
622
- "model_module": "@jupyter-widgets/controls",
623
- "model_name": "HTMLModel",
624
- "model_module_version": "1.5.0",
625
- "state": {
626
- "_dom_classes": [],
627
- "_model_module": "@jupyter-widgets/controls",
628
- "_model_module_version": "1.5.0",
629
- "_model_name": "HTMLModel",
630
- "_view_count": null,
631
- "_view_module": "@jupyter-widgets/controls",
632
- "_view_module_version": "1.5.0",
633
- "_view_name": "HTMLView",
634
- "description": "",
635
- "description_tooltip": null,
636
- "layout": "IPY_MODEL_c1bdaff966f84b949b22b650311b6528",
637
- "placeholder": "​",
638
- "style": "IPY_MODEL_f16a675994434ddb8b5983faf193b381",
639
- "value": " 89.9M/89.9M [00:03&lt;00:00, 25.2MB/s]"
640
- }
641
- },
642
- "21cc893be48e460ab6e83e0f685249c2": {
643
- "model_module": "@jupyter-widgets/base",
644
- "model_name": "LayoutModel",
645
- "model_module_version": "1.2.0",
646
- "state": {
647
- "_model_module": "@jupyter-widgets/base",
648
- "_model_module_version": "1.2.0",
649
- "_model_name": "LayoutModel",
650
- "_view_count": null,
651
- "_view_module": "@jupyter-widgets/base",
652
- "_view_module_version": "1.2.0",
653
- "_view_name": "LayoutView",
654
- "align_content": null,
655
- "align_items": null,
656
- "align_self": null,
657
- "border": null,
658
- "bottom": null,
659
- "display": null,
660
- "flex": null,
661
- "flex_flow": null,
662
- "grid_area": null,
663
- "grid_auto_columns": null,
664
- "grid_auto_flow": null,
665
- "grid_auto_rows": null,
666
- "grid_column": null,
667
- "grid_gap": null,
668
- "grid_row": null,
669
- "grid_template_areas": null,
670
- "grid_template_columns": null,
671
- "grid_template_rows": null,
672
- "height": null,
673
- "justify_content": null,
674
- "justify_items": null,
675
- "left": null,
676
- "margin": null,
677
- "max_height": null,
678
- "max_width": null,
679
- "min_height": null,
680
- "min_width": null,
681
- "object_fit": null,
682
- "object_position": null,
683
- "order": null,
684
- "overflow": null,
685
- "overflow_x": null,
686
- "overflow_y": null,
687
- "padding": null,
688
- "right": null,
689
- "top": null,
690
- "visibility": null,
691
- "width": null
692
- }
693
- },
694
- "047db71ca65846f1bacf16fbcd9c40f8": {
695
- "model_module": "@jupyter-widgets/base",
696
- "model_name": "LayoutModel",
697
- "model_module_version": "1.2.0",
698
- "state": {
699
- "_model_module": "@jupyter-widgets/base",
700
- "_model_module_version": "1.2.0",
701
- "_model_name": "LayoutModel",
702
- "_view_count": null,
703
- "_view_module": "@jupyter-widgets/base",
704
- "_view_module_version": "1.2.0",
705
- "_view_name": "LayoutView",
706
- "align_content": null,
707
- "align_items": null,
708
- "align_self": null,
709
- "border": null,
710
- "bottom": null,
711
- "display": null,
712
- "flex": null,
713
- "flex_flow": null,
714
- "grid_area": null,
715
- "grid_auto_columns": null,
716
- "grid_auto_flow": null,
717
- "grid_auto_rows": null,
718
- "grid_column": null,
719
- "grid_gap": null,
720
- "grid_row": null,
721
- "grid_template_areas": null,
722
- "grid_template_columns": null,
723
- "grid_template_rows": null,
724
- "height": null,
725
- "justify_content": null,
726
- "justify_items": null,
727
- "left": null,
728
- "margin": null,
729
- "max_height": null,
730
- "max_width": null,
731
- "min_height": null,
732
- "min_width": null,
733
- "object_fit": null,
734
- "object_position": null,
735
- "order": null,
736
- "overflow": null,
737
- "overflow_x": null,
738
- "overflow_y": null,
739
- "padding": null,
740
- "right": null,
741
- "top": null,
742
- "visibility": null,
743
- "width": null
744
- }
745
- },
746
- "3813240ddf704b91ac556c05fe7499f7": {
747
- "model_module": "@jupyter-widgets/controls",
748
- "model_name": "DescriptionStyleModel",
749
- "model_module_version": "1.5.0",
750
- "state": {
751
- "_model_module": "@jupyter-widgets/controls",
752
- "_model_module_version": "1.5.0",
753
- "_model_name": "DescriptionStyleModel",
754
- "_view_count": null,
755
- "_view_module": "@jupyter-widgets/base",
756
- "_view_module_version": "1.2.0",
757
- "_view_name": "StyleView",
758
- "description_width": ""
759
- }
760
- },
761
- "46f47846e9cd4b9b9cf321247fe0e518": {
762
- "model_module": "@jupyter-widgets/base",
763
- "model_name": "LayoutModel",
764
- "model_module_version": "1.2.0",
765
- "state": {
766
- "_model_module": "@jupyter-widgets/base",
767
- "_model_module_version": "1.2.0",
768
- "_model_name": "LayoutModel",
769
- "_view_count": null,
770
- "_view_module": "@jupyter-widgets/base",
771
- "_view_module_version": "1.2.0",
772
- "_view_name": "LayoutView",
773
- "align_content": null,
774
- "align_items": null,
775
- "align_self": null,
776
- "border": null,
777
- "bottom": null,
778
- "display": null,
779
- "flex": null,
780
- "flex_flow": null,
781
- "grid_area": null,
782
- "grid_auto_columns": null,
783
- "grid_auto_flow": null,
784
- "grid_auto_rows": null,
785
- "grid_column": null,
786
- "grid_gap": null,
787
- "grid_row": null,
788
- "grid_template_areas": null,
789
- "grid_template_columns": null,
790
- "grid_template_rows": null,
791
- "height": null,
792
- "justify_content": null,
793
- "justify_items": null,
794
- "left": null,
795
- "margin": null,
796
- "max_height": null,
797
- "max_width": null,
798
- "min_height": null,
799
- "min_width": null,
800
- "object_fit": null,
801
- "object_position": null,
802
- "order": null,
803
- "overflow": null,
804
- "overflow_x": null,
805
- "overflow_y": null,
806
- "padding": null,
807
- "right": null,
808
- "top": null,
809
- "visibility": null,
810
- "width": null
811
- }
812
- },
813
- "922f354c9c5b4c9ebae6ad0e1c87b760": {
814
- "model_module": "@jupyter-widgets/controls",
815
- "model_name": "ProgressStyleModel",
816
- "model_module_version": "1.5.0",
817
- "state": {
818
- "_model_module": "@jupyter-widgets/controls",
819
- "_model_module_version": "1.5.0",
820
- "_model_name": "ProgressStyleModel",
821
- "_view_count": null,
822
- "_view_module": "@jupyter-widgets/base",
823
- "_view_module_version": "1.2.0",
824
- "_view_name": "StyleView",
825
- "bar_color": null,
826
- "description_width": ""
827
- }
828
- },
829
- "c1bdaff966f84b949b22b650311b6528": {
830
- "model_module": "@jupyter-widgets/base",
831
- "model_name": "LayoutModel",
832
- "model_module_version": "1.2.0",
833
- "state": {
834
- "_model_module": "@jupyter-widgets/base",
835
- "_model_module_version": "1.2.0",
836
- "_model_name": "LayoutModel",
837
- "_view_count": null,
838
- "_view_module": "@jupyter-widgets/base",
839
- "_view_module_version": "1.2.0",
840
- "_view_name": "LayoutView",
841
- "align_content": null,
842
- "align_items": null,
843
- "align_self": null,
844
- "border": null,
845
- "bottom": null,
846
- "display": null,
847
- "flex": null,
848
- "flex_flow": null,
849
- "grid_area": null,
850
- "grid_auto_columns": null,
851
- "grid_auto_flow": null,
852
- "grid_auto_rows": null,
853
- "grid_column": null,
854
- "grid_gap": null,
855
- "grid_row": null,
856
- "grid_template_areas": null,
857
- "grid_template_columns": null,
858
- "grid_template_rows": null,
859
- "height": null,
860
- "justify_content": null,
861
- "justify_items": null,
862
- "left": null,
863
- "margin": null,
864
- "max_height": null,
865
- "max_width": null,
866
- "min_height": null,
867
- "min_width": null,
868
- "object_fit": null,
869
- "object_position": null,
870
- "order": null,
871
- "overflow": null,
872
- "overflow_x": null,
873
- "overflow_y": null,
874
- "padding": null,
875
- "right": null,
876
- "top": null,
877
- "visibility": null,
878
- "width": null
879
- }
880
- },
881
- "f16a675994434ddb8b5983faf193b381": {
882
- "model_module": "@jupyter-widgets/controls",
883
- "model_name": "DescriptionStyleModel",
884
- "model_module_version": "1.5.0",
885
- "state": {
886
- "_model_module": "@jupyter-widgets/controls",
887
- "_model_module_version": "1.5.0",
888
- "_model_name": "DescriptionStyleModel",
889
- "_view_count": null,
890
- "_view_module": "@jupyter-widgets/base",
891
- "_view_module_version": "1.2.0",
892
- "_view_name": "StyleView",
893
- "description_width": ""
894
- }
895
- },
896
- "79e90f6090ee493890acbf26292dd997": {
897
- "model_module": "@jupyter-widgets/controls",
898
- "model_name": "HBoxModel",
899
- "model_module_version": "1.5.0",
900
- "state": {
901
- "_dom_classes": [],
902
- "_model_module": "@jupyter-widgets/controls",
903
- "_model_module_version": "1.5.0",
904
- "_model_name": "HBoxModel",
905
- "_view_count": null,
906
- "_view_module": "@jupyter-widgets/controls",
907
- "_view_module_version": "1.5.0",
908
- "_view_name": "HBoxView",
909
- "box_style": "",
910
- "children": [
911
- "IPY_MODEL_d245bcee114a4216bbfc6a1999daca08",
912
- "IPY_MODEL_e2acb9e61cec4083b3113415a205df12",
913
- "IPY_MODEL_08a63fea880840a2a472008d98bb03d2"
914
- ],
915
- "layout": "IPY_MODEL_004d4e339ec840c8bb748f4fdcef663a"
916
- }
917
- },
918
- "d245bcee114a4216bbfc6a1999daca08": {
919
- "model_module": "@jupyter-widgets/controls",
920
- "model_name": "HTMLModel",
921
- "model_module_version": "1.5.0",
922
- "state": {
923
- "_dom_classes": [],
924
- "_model_module": "@jupyter-widgets/controls",
925
- "_model_module_version": "1.5.0",
926
- "_model_name": "HTMLModel",
927
- "_view_count": null,
928
- "_view_module": "@jupyter-widgets/controls",
929
- "_view_module_version": "1.5.0",
930
- "_view_name": "HTMLView",
931
- "description": "",
932
- "description_tooltip": null,
933
- "layout": "IPY_MODEL_3f9668c6708247f4b65ae9204b5df832",
934
- "placeholder": "​",
935
- "style": "IPY_MODEL_b1189ac61af44228a2e52026e73b0d2e",
936
- "value": "samples_mmlu_all_2024-12-05T10-28-27.223863.jsonl: 100%"
937
- }
938
- },
939
- "e2acb9e61cec4083b3113415a205df12": {
940
- "model_module": "@jupyter-widgets/controls",
941
- "model_name": "FloatProgressModel",
942
- "model_module_version": "1.5.0",
943
- "state": {
944
- "_dom_classes": [],
945
- "_model_module": "@jupyter-widgets/controls",
946
- "_model_module_version": "1.5.0",
947
- "_model_name": "FloatProgressModel",
948
- "_view_count": null,
949
- "_view_module": "@jupyter-widgets/controls",
950
- "_view_module_version": "1.5.0",
951
- "_view_name": "ProgressView",
952
- "bar_style": "success",
953
- "description": "",
954
- "description_tooltip": null,
955
- "layout": "IPY_MODEL_8c89d406ae534cf0a7ef99eba4263b2b",
956
- "max": 89904912,
957
- "min": 0,
958
- "orientation": "horizontal",
959
- "style": "IPY_MODEL_8adaf7159dcf4b318c2fc5930dd8633d",
960
- "value": 89904912
961
- }
962
- },
963
- "08a63fea880840a2a472008d98bb03d2": {
964
- "model_module": "@jupyter-widgets/controls",
965
- "model_name": "HTMLModel",
966
- "model_module_version": "1.5.0",
967
- "state": {
968
- "_dom_classes": [],
969
- "_model_module": "@jupyter-widgets/controls",
970
- "_model_module_version": "1.5.0",
971
- "_model_name": "HTMLModel",
972
- "_view_count": null,
973
- "_view_module": "@jupyter-widgets/controls",
974
- "_view_module_version": "1.5.0",
975
- "_view_name": "HTMLView",
976
- "description": "",
977
- "description_tooltip": null,
978
- "layout": "IPY_MODEL_24bd51fe6e81460f9a9c99a1961a4107",
979
- "placeholder": "​",
980
- "style": "IPY_MODEL_2b0ffc94bfca468a89f0799cbef29ee5",
981
- "value": " 89.9M/89.9M [00:04&lt;00:00, 17.7MB/s]"
982
- }
983
- },
984
- "004d4e339ec840c8bb748f4fdcef663a": {
985
- "model_module": "@jupyter-widgets/base",
986
- "model_name": "LayoutModel",
987
- "model_module_version": "1.2.0",
988
- "state": {
989
- "_model_module": "@jupyter-widgets/base",
990
- "_model_module_version": "1.2.0",
991
- "_model_name": "LayoutModel",
992
- "_view_count": null,
993
- "_view_module": "@jupyter-widgets/base",
994
- "_view_module_version": "1.2.0",
995
- "_view_name": "LayoutView",
996
- "align_content": null,
997
- "align_items": null,
998
- "align_self": null,
999
- "border": null,
1000
- "bottom": null,
1001
- "display": null,
1002
- "flex": null,
1003
- "flex_flow": null,
1004
- "grid_area": null,
1005
- "grid_auto_columns": null,
1006
- "grid_auto_flow": null,
1007
- "grid_auto_rows": null,
1008
- "grid_column": null,
1009
- "grid_gap": null,
1010
- "grid_row": null,
1011
- "grid_template_areas": null,
1012
- "grid_template_columns": null,
1013
- "grid_template_rows": null,
1014
- "height": null,
1015
- "justify_content": null,
1016
- "justify_items": null,
1017
- "left": null,
1018
- "margin": null,
1019
- "max_height": null,
1020
- "max_width": null,
1021
- "min_height": null,
1022
- "min_width": null,
1023
- "object_fit": null,
1024
- "object_position": null,
1025
- "order": null,
1026
- "overflow": null,
1027
- "overflow_x": null,
1028
- "overflow_y": null,
1029
- "padding": null,
1030
- "right": null,
1031
- "top": null,
1032
- "visibility": null,
1033
- "width": null
1034
- }
1035
- },
1036
- "3f9668c6708247f4b65ae9204b5df832": {
1037
- "model_module": "@jupyter-widgets/base",
1038
- "model_name": "LayoutModel",
1039
- "model_module_version": "1.2.0",
1040
- "state": {
1041
- "_model_module": "@jupyter-widgets/base",
1042
- "_model_module_version": "1.2.0",
1043
- "_model_name": "LayoutModel",
1044
- "_view_count": null,
1045
- "_view_module": "@jupyter-widgets/base",
1046
- "_view_module_version": "1.2.0",
1047
- "_view_name": "LayoutView",
1048
- "align_content": null,
1049
- "align_items": null,
1050
- "align_self": null,
1051
- "border": null,
1052
- "bottom": null,
1053
- "display": null,
1054
- "flex": null,
1055
- "flex_flow": null,
1056
- "grid_area": null,
1057
- "grid_auto_columns": null,
1058
- "grid_auto_flow": null,
1059
- "grid_auto_rows": null,
1060
- "grid_column": null,
1061
- "grid_gap": null,
1062
- "grid_row": null,
1063
- "grid_template_areas": null,
1064
- "grid_template_columns": null,
1065
- "grid_template_rows": null,
1066
- "height": null,
1067
- "justify_content": null,
1068
- "justify_items": null,
1069
- "left": null,
1070
- "margin": null,
1071
- "max_height": null,
1072
- "max_width": null,
1073
- "min_height": null,
1074
- "min_width": null,
1075
- "object_fit": null,
1076
- "object_position": null,
1077
- "order": null,
1078
- "overflow": null,
1079
- "overflow_x": null,
1080
- "overflow_y": null,
1081
- "padding": null,
1082
- "right": null,
1083
- "top": null,
1084
- "visibility": null,
1085
- "width": null
1086
- }
1087
- },
1088
- "b1189ac61af44228a2e52026e73b0d2e": {
1089
- "model_module": "@jupyter-widgets/controls",
1090
- "model_name": "DescriptionStyleModel",
1091
- "model_module_version": "1.5.0",
1092
- "state": {
1093
- "_model_module": "@jupyter-widgets/controls",
1094
- "_model_module_version": "1.5.0",
1095
- "_model_name": "DescriptionStyleModel",
1096
- "_view_count": null,
1097
- "_view_module": "@jupyter-widgets/base",
1098
- "_view_module_version": "1.2.0",
1099
- "_view_name": "StyleView",
1100
- "description_width": ""
1101
- }
1102
- },
1103
- "8c89d406ae534cf0a7ef99eba4263b2b": {
1104
- "model_module": "@jupyter-widgets/base",
1105
- "model_name": "LayoutModel",
1106
- "model_module_version": "1.2.0",
1107
- "state": {
1108
- "_model_module": "@jupyter-widgets/base",
1109
- "_model_module_version": "1.2.0",
1110
- "_model_name": "LayoutModel",
1111
- "_view_count": null,
1112
- "_view_module": "@jupyter-widgets/base",
1113
- "_view_module_version": "1.2.0",
1114
- "_view_name": "LayoutView",
1115
- "align_content": null,
1116
- "align_items": null,
1117
- "align_self": null,
1118
- "border": null,
1119
- "bottom": null,
1120
- "display": null,
1121
- "flex": null,
1122
- "flex_flow": null,
1123
- "grid_area": null,
1124
- "grid_auto_columns": null,
1125
- "grid_auto_flow": null,
1126
- "grid_auto_rows": null,
1127
- "grid_column": null,
1128
- "grid_gap": null,
1129
- "grid_row": null,
1130
- "grid_template_areas": null,
1131
- "grid_template_columns": null,
1132
- "grid_template_rows": null,
1133
- "height": null,
1134
- "justify_content": null,
1135
- "justify_items": null,
1136
- "left": null,
1137
- "margin": null,
1138
- "max_height": null,
1139
- "max_width": null,
1140
- "min_height": null,
1141
- "min_width": null,
1142
- "object_fit": null,
1143
- "object_position": null,
1144
- "order": null,
1145
- "overflow": null,
1146
- "overflow_x": null,
1147
- "overflow_y": null,
1148
- "padding": null,
1149
- "right": null,
1150
- "top": null,
1151
- "visibility": null,
1152
- "width": null
1153
- }
1154
- },
1155
- "8adaf7159dcf4b318c2fc5930dd8633d": {
1156
- "model_module": "@jupyter-widgets/controls",
1157
- "model_name": "ProgressStyleModel",
1158
- "model_module_version": "1.5.0",
1159
- "state": {
1160
- "_model_module": "@jupyter-widgets/controls",
1161
- "_model_module_version": "1.5.0",
1162
- "_model_name": "ProgressStyleModel",
1163
- "_view_count": null,
1164
- "_view_module": "@jupyter-widgets/base",
1165
- "_view_module_version": "1.2.0",
1166
- "_view_name": "StyleView",
1167
- "bar_color": null,
1168
- "description_width": ""
1169
- }
1170
- },
1171
- "24bd51fe6e81460f9a9c99a1961a4107": {
1172
- "model_module": "@jupyter-widgets/base",
1173
- "model_name": "LayoutModel",
1174
- "model_module_version": "1.2.0",
1175
- "state": {
1176
- "_model_module": "@jupyter-widgets/base",
1177
- "_model_module_version": "1.2.0",
1178
- "_model_name": "LayoutModel",
1179
- "_view_count": null,
1180
- "_view_module": "@jupyter-widgets/base",
1181
- "_view_module_version": "1.2.0",
1182
- "_view_name": "LayoutView",
1183
- "align_content": null,
1184
- "align_items": null,
1185
- "align_self": null,
1186
- "border": null,
1187
- "bottom": null,
1188
- "display": null,
1189
- "flex": null,
1190
- "flex_flow": null,
1191
- "grid_area": null,
1192
- "grid_auto_columns": null,
1193
- "grid_auto_flow": null,
1194
- "grid_auto_rows": null,
1195
- "grid_column": null,
1196
- "grid_gap": null,
1197
- "grid_row": null,
1198
- "grid_template_areas": null,
1199
- "grid_template_columns": null,
1200
- "grid_template_rows": null,
1201
- "height": null,
1202
- "justify_content": null,
1203
- "justify_items": null,
1204
- "left": null,
1205
- "margin": null,
1206
- "max_height": null,
1207
- "max_width": null,
1208
- "min_height": null,
1209
- "min_width": null,
1210
- "object_fit": null,
1211
- "object_position": null,
1212
- "order": null,
1213
- "overflow": null,
1214
- "overflow_x": null,
1215
- "overflow_y": null,
1216
- "padding": null,
1217
- "right": null,
1218
- "top": null,
1219
- "visibility": null,
1220
- "width": null
1221
- }
1222
- },
1223
- "2b0ffc94bfca468a89f0799cbef29ee5": {
1224
- "model_module": "@jupyter-widgets/controls",
1225
- "model_name": "DescriptionStyleModel",
1226
- "model_module_version": "1.5.0",
1227
- "state": {
1228
- "_model_module": "@jupyter-widgets/controls",
1229
- "_model_module_version": "1.5.0",
1230
- "_model_name": "DescriptionStyleModel",
1231
- "_view_count": null,
1232
- "_view_module": "@jupyter-widgets/base",
1233
- "_view_module_version": "1.2.0",
1234
- "_view_name": "StyleView",
1235
- "description_width": ""
1236
- }
1237
- },
1238
- "c2b881fabd4f4758bd9aca516a579b4c": {
1239
- "model_module": "@jupyter-widgets/controls",
1240
- "model_name": "HBoxModel",
1241
- "model_module_version": "1.5.0",
1242
- "state": {
1243
- "_dom_classes": [],
1244
- "_model_module": "@jupyter-widgets/controls",
1245
- "_model_module_version": "1.5.0",
1246
- "_model_name": "HBoxModel",
1247
- "_view_count": null,
1248
- "_view_module": "@jupyter-widgets/controls",
1249
- "_view_module_version": "1.5.0",
1250
- "_view_name": "HBoxView",
1251
- "box_style": "",
1252
- "children": [
1253
- "IPY_MODEL_6ab81736ee0643c7ae5e20978914fef4",
1254
- "IPY_MODEL_84ee9c147755422fb74229a2817ad35a",
1255
- "IPY_MODEL_35025439e8e545d28ee9f3c51df4f1bb"
1256
- ],
1257
- "layout": "IPY_MODEL_da989b5fe95449f6928355cd317d7f4a"
1258
- }
1259
- },
1260
- "6ab81736ee0643c7ae5e20978914fef4": {
1261
- "model_module": "@jupyter-widgets/controls",
1262
- "model_name": "HTMLModel",
1263
- "model_module_version": "1.5.0",
1264
- "state": {
1265
- "_dom_classes": [],
1266
- "_model_module": "@jupyter-widgets/controls",
1267
- "_model_module_version": "1.5.0",
1268
- "_model_name": "HTMLModel",
1269
- "_view_count": null,
1270
- "_view_module": "@jupyter-widgets/controls",
1271
- "_view_module_version": "1.5.0",
1272
- "_view_name": "HTMLView",
1273
- "description": "",
1274
- "description_tooltip": null,
1275
- "layout": "IPY_MODEL_ed8214d39c2e4e47a7b32e1152597642",
1276
- "placeholder": "​",
1277
- "style": "IPY_MODEL_6210d27773144f78aed58c4ad9aaa0a2",
1278
- "value": "samples_mmlu_all_2024-12-05T11-08-04.943164.jsonl: 100%"
1279
- }
1280
- },
1281
- "84ee9c147755422fb74229a2817ad35a": {
1282
- "model_module": "@jupyter-widgets/controls",
1283
- "model_name": "FloatProgressModel",
1284
- "model_module_version": "1.5.0",
1285
- "state": {
1286
- "_dom_classes": [],
1287
- "_model_module": "@jupyter-widgets/controls",
1288
- "_model_module_version": "1.5.0",
1289
- "_model_name": "FloatProgressModel",
1290
- "_view_count": null,
1291
- "_view_module": "@jupyter-widgets/controls",
1292
- "_view_module_version": "1.5.0",
1293
- "_view_name": "ProgressView",
1294
- "bar_style": "success",
1295
- "description": "",
1296
- "description_tooltip": null,
1297
- "layout": "IPY_MODEL_0e65b64cc85c4b52aa48c11f65ccceeb",
1298
- "max": 89914374,
1299
- "min": 0,
1300
- "orientation": "horizontal",
1301
- "style": "IPY_MODEL_85504ad2f7be45e1a828b5077905dad5",
1302
- "value": 89914374
1303
- }
1304
- },
1305
- "35025439e8e545d28ee9f3c51df4f1bb": {
1306
- "model_module": "@jupyter-widgets/controls",
1307
- "model_name": "HTMLModel",
1308
- "model_module_version": "1.5.0",
1309
- "state": {
1310
- "_dom_classes": [],
1311
- "_model_module": "@jupyter-widgets/controls",
1312
- "_model_module_version": "1.5.0",
1313
- "_model_name": "HTMLModel",
1314
- "_view_count": null,
1315
- "_view_module": "@jupyter-widgets/controls",
1316
- "_view_module_version": "1.5.0",
1317
- "_view_name": "HTMLView",
1318
- "description": "",
1319
- "description_tooltip": null,
1320
- "layout": "IPY_MODEL_a638974916d74c239e287002bc1d089a",
1321
- "placeholder": "​",
1322
- "style": "IPY_MODEL_a6fcb58400a54716bda1e4dc511add95",
1323
- "value": " 89.9M/89.9M [00:10&lt;00:00, 3.64MB/s]"
1324
- }
1325
- },
1326
- "da989b5fe95449f6928355cd317d7f4a": {
1327
- "model_module": "@jupyter-widgets/base",
1328
- "model_name": "LayoutModel",
1329
- "model_module_version": "1.2.0",
1330
- "state": {
1331
- "_model_module": "@jupyter-widgets/base",
1332
- "_model_module_version": "1.2.0",
1333
- "_model_name": "LayoutModel",
1334
- "_view_count": null,
1335
- "_view_module": "@jupyter-widgets/base",
1336
- "_view_module_version": "1.2.0",
1337
- "_view_name": "LayoutView",
1338
- "align_content": null,
1339
- "align_items": null,
1340
- "align_self": null,
1341
- "border": null,
1342
- "bottom": null,
1343
- "display": null,
1344
- "flex": null,
1345
- "flex_flow": null,
1346
- "grid_area": null,
1347
- "grid_auto_columns": null,
1348
- "grid_auto_flow": null,
1349
- "grid_auto_rows": null,
1350
- "grid_column": null,
1351
- "grid_gap": null,
1352
- "grid_row": null,
1353
- "grid_template_areas": null,
1354
- "grid_template_columns": null,
1355
- "grid_template_rows": null,
1356
- "height": null,
1357
- "justify_content": null,
1358
- "justify_items": null,
1359
- "left": null,
1360
- "margin": null,
1361
- "max_height": null,
1362
- "max_width": null,
1363
- "min_height": null,
1364
- "min_width": null,
1365
- "object_fit": null,
1366
- "object_position": null,
1367
- "order": null,
1368
- "overflow": null,
1369
- "overflow_x": null,
1370
- "overflow_y": null,
1371
- "padding": null,
1372
- "right": null,
1373
- "top": null,
1374
- "visibility": null,
1375
- "width": null
1376
- }
1377
- },
1378
- "ed8214d39c2e4e47a7b32e1152597642": {
1379
- "model_module": "@jupyter-widgets/base",
1380
- "model_name": "LayoutModel",
1381
- "model_module_version": "1.2.0",
1382
- "state": {
1383
- "_model_module": "@jupyter-widgets/base",
1384
- "_model_module_version": "1.2.0",
1385
- "_model_name": "LayoutModel",
1386
- "_view_count": null,
1387
- "_view_module": "@jupyter-widgets/base",
1388
- "_view_module_version": "1.2.0",
1389
- "_view_name": "LayoutView",
1390
- "align_content": null,
1391
- "align_items": null,
1392
- "align_self": null,
1393
- "border": null,
1394
- "bottom": null,
1395
- "display": null,
1396
- "flex": null,
1397
- "flex_flow": null,
1398
- "grid_area": null,
1399
- "grid_auto_columns": null,
1400
- "grid_auto_flow": null,
1401
- "grid_auto_rows": null,
1402
- "grid_column": null,
1403
- "grid_gap": null,
1404
- "grid_row": null,
1405
- "grid_template_areas": null,
1406
- "grid_template_columns": null,
1407
- "grid_template_rows": null,
1408
- "height": null,
1409
- "justify_content": null,
1410
- "justify_items": null,
1411
- "left": null,
1412
- "margin": null,
1413
- "max_height": null,
1414
- "max_width": null,
1415
- "min_height": null,
1416
- "min_width": null,
1417
- "object_fit": null,
1418
- "object_position": null,
1419
- "order": null,
1420
- "overflow": null,
1421
- "overflow_x": null,
1422
- "overflow_y": null,
1423
- "padding": null,
1424
- "right": null,
1425
- "top": null,
1426
- "visibility": null,
1427
- "width": null
1428
- }
1429
- },
1430
- "6210d27773144f78aed58c4ad9aaa0a2": {
1431
- "model_module": "@jupyter-widgets/controls",
1432
- "model_name": "DescriptionStyleModel",
1433
- "model_module_version": "1.5.0",
1434
- "state": {
1435
- "_model_module": "@jupyter-widgets/controls",
1436
- "_model_module_version": "1.5.0",
1437
- "_model_name": "DescriptionStyleModel",
1438
- "_view_count": null,
1439
- "_view_module": "@jupyter-widgets/base",
1440
- "_view_module_version": "1.2.0",
1441
- "_view_name": "StyleView",
1442
- "description_width": ""
1443
- }
1444
- },
1445
- "0e65b64cc85c4b52aa48c11f65ccceeb": {
1446
- "model_module": "@jupyter-widgets/base",
1447
- "model_name": "LayoutModel",
1448
- "model_module_version": "1.2.0",
1449
- "state": {
1450
- "_model_module": "@jupyter-widgets/base",
1451
- "_model_module_version": "1.2.0",
1452
- "_model_name": "LayoutModel",
1453
- "_view_count": null,
1454
- "_view_module": "@jupyter-widgets/base",
1455
- "_view_module_version": "1.2.0",
1456
- "_view_name": "LayoutView",
1457
- "align_content": null,
1458
- "align_items": null,
1459
- "align_self": null,
1460
- "border": null,
1461
- "bottom": null,
1462
- "display": null,
1463
- "flex": null,
1464
- "flex_flow": null,
1465
- "grid_area": null,
1466
- "grid_auto_columns": null,
1467
- "grid_auto_flow": null,
1468
- "grid_auto_rows": null,
1469
- "grid_column": null,
1470
- "grid_gap": null,
1471
- "grid_row": null,
1472
- "grid_template_areas": null,
1473
- "grid_template_columns": null,
1474
- "grid_template_rows": null,
1475
- "height": null,
1476
- "justify_content": null,
1477
- "justify_items": null,
1478
- "left": null,
1479
- "margin": null,
1480
- "max_height": null,
1481
- "max_width": null,
1482
- "min_height": null,
1483
- "min_width": null,
1484
- "object_fit": null,
1485
- "object_position": null,
1486
- "order": null,
1487
- "overflow": null,
1488
- "overflow_x": null,
1489
- "overflow_y": null,
1490
- "padding": null,
1491
- "right": null,
1492
- "top": null,
1493
- "visibility": null,
1494
- "width": null
1495
- }
1496
- },
1497
- "85504ad2f7be45e1a828b5077905dad5": {
1498
- "model_module": "@jupyter-widgets/controls",
1499
- "model_name": "ProgressStyleModel",
1500
- "model_module_version": "1.5.0",
1501
- "state": {
1502
- "_model_module": "@jupyter-widgets/controls",
1503
- "_model_module_version": "1.5.0",
1504
- "_model_name": "ProgressStyleModel",
1505
- "_view_count": null,
1506
- "_view_module": "@jupyter-widgets/base",
1507
- "_view_module_version": "1.2.0",
1508
- "_view_name": "StyleView",
1509
- "bar_color": null,
1510
- "description_width": ""
1511
- }
1512
- },
1513
- "a638974916d74c239e287002bc1d089a": {
1514
- "model_module": "@jupyter-widgets/base",
1515
- "model_name": "LayoutModel",
1516
- "model_module_version": "1.2.0",
1517
- "state": {
1518
- "_model_module": "@jupyter-widgets/base",
1519
- "_model_module_version": "1.2.0",
1520
- "_model_name": "LayoutModel",
1521
- "_view_count": null,
1522
- "_view_module": "@jupyter-widgets/base",
1523
- "_view_module_version": "1.2.0",
1524
- "_view_name": "LayoutView",
1525
- "align_content": null,
1526
- "align_items": null,
1527
- "align_self": null,
1528
- "border": null,
1529
- "bottom": null,
1530
- "display": null,
1531
- "flex": null,
1532
- "flex_flow": null,
1533
- "grid_area": null,
1534
- "grid_auto_columns": null,
1535
- "grid_auto_flow": null,
1536
- "grid_auto_rows": null,
1537
- "grid_column": null,
1538
- "grid_gap": null,
1539
- "grid_row": null,
1540
- "grid_template_areas": null,
1541
- "grid_template_columns": null,
1542
- "grid_template_rows": null,
1543
- "height": null,
1544
- "justify_content": null,
1545
- "justify_items": null,
1546
- "left": null,
1547
- "margin": null,
1548
- "max_height": null,
1549
- "max_width": null,
1550
- "min_height": null,
1551
- "min_width": null,
1552
- "object_fit": null,
1553
- "object_position": null,
1554
- "order": null,
1555
- "overflow": null,
1556
- "overflow_x": null,
1557
- "overflow_y": null,
1558
- "padding": null,
1559
- "right": null,
1560
- "top": null,
1561
- "visibility": null,
1562
- "width": null
1563
- }
1564
- },
1565
- "a6fcb58400a54716bda1e4dc511add95": {
1566
- "model_module": "@jupyter-widgets/controls",
1567
- "model_name": "DescriptionStyleModel",
1568
- "model_module_version": "1.5.0",
1569
- "state": {
1570
- "_model_module": "@jupyter-widgets/controls",
1571
- "_model_module_version": "1.5.0",
1572
- "_model_name": "DescriptionStyleModel",
1573
- "_view_count": null,
1574
- "_view_module": "@jupyter-widgets/base",
1575
- "_view_module_version": "1.2.0",
1576
- "_view_name": "StyleView",
1577
- "description_width": ""
1578
- }
1579
- }
1580
- }
1581
  }
1582
  },
1583
  "nbformat": 4,
 
13
  "cell_type": "code",
14
  "execution_count": null,
15
  "metadata": {
16
+ "id": "kGW7vfRkrqHe"
 
 
 
 
17
  },
18
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
19
  "source": [
20
  "!pip install -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt"
21
  ]
 
132
  "cell_type": "code",
133
  "execution_count": null,
134
  "metadata": {
135
+ "id": "uMoitxJkHerH"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
136
  },
137
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
138
  "source": [
139
  "hf_upload_folder(BASE_FOLDER)"
140
  ]
 
159
  "cell_type": "code",
160
  "execution_count": null,
161
  "metadata": {
162
+ "id": "eIUOqu5sHfkM"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
  },
164
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  "source": [
166
  "hf_upload_folder(BASE_FOLDER)"
167
  ]
 
186
  "cell_type": "code",
187
  "execution_count": null,
188
  "metadata": {
189
+ "id": "xsL82Q4SHgMn"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
190
  },
191
+ "outputs": [],
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
192
  "source": [
193
  "hf_upload_folder(BASE_FOLDER)"
194
  ]
 
207
  "execution_count": null,
208
  "metadata": {
209
  "collapsed": true,
210
+ "id": "ilu9_ulWTy3p"
 
 
 
 
211
  },
212
+ "outputs": [],
 
 
 
 
 
 
 
 
213
  "source": [
214
  "!lm_eval \\\n",
215
  "--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,revision=41bd4c9e7e4fb318ca40e721131d4933966c2cc1,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n",
 
223
  "cell_type": "code",
224
  "execution_count": null,
225
  "metadata": {
226
+ "id": "jE5r8gVDHhAz"
 
 
 
 
227
  },
228
+ "outputs": [],
 
 
 
 
 
 
 
 
 
229
  "source": [
230
  "hf_upload_folder(BASE_FOLDER)"
231
  ]
 
244
  },
245
  "language_info": {
246
  "name": "python"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
247
  }
248
  },
249
  "nbformat": 4,
llm_metaeval_eval_harness_results.ipynb CHANGED
The diff for this file is too large to render. See raw diff