flunardelli commited on
Commit
8e20010
·
verified ·
1 Parent(s): b8ee329

Update llm_metaeval_eval_harness_mmlu.ipynb

Browse files
Files changed (1) hide show
  1. llm_metaeval_eval_harness_mmlu.ipynb +148 -119
llm_metaeval_eval_harness_mmlu.ipynb CHANGED
@@ -1,70 +1,90 @@
1
  {
2
- "nbformat": 4,
3
- "nbformat_minor": 0,
4
- "metadata": {
5
- "colab": {
6
- "provenance": [],
7
- "gpuType": "T4"
8
- },
9
- "kernelspec": {
10
- "name": "python3",
11
- "display_name": "Python 3"
12
- },
13
- "language_info": {
14
- "name": "python"
15
- },
16
- "accelerator": "GPU"
17
- },
18
  "cells": [
19
  {
20
  "cell_type": "markdown",
21
- "source": [
22
- "Initial setup"
23
- ],
24
  "metadata": {
25
  "id": "U8RTc2PmnX-v"
26
- }
 
 
 
27
  },
28
  {
29
  "cell_type": "code",
30
- "source": [
31
- "!pip install -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt"
32
- ],
33
  "metadata": {
34
  "id": "kGW7vfRkrqHe"
35
  },
36
- "execution_count": null,
37
- "outputs": []
 
 
38
  },
39
  {
40
  "cell_type": "code",
41
- "source": [
42
- "from huggingface_hub import notebook_login\n",
43
- "notebook_login()"
44
- ],
45
  "metadata": {
46
  "id": "2I850FIsCVNw"
47
  },
48
- "execution_count": null,
49
- "outputs": []
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
50
  },
51
  {
52
  "cell_type": "markdown",
53
- "source": [
54
- "Create task for MMLU all datasets"
55
- ],
56
  "metadata": {
57
  "id": "Jd2JwKZaPkNS"
58
- }
 
 
 
59
  },
60
  {
61
  "cell_type": "code",
 
 
 
 
 
62
  "source": [
63
  "YAML_mmlu_en_us_string = \"\"\"\n",
64
  "task: mmlu_all\n",
65
  "dataset_path: cais/mmlu\n",
66
  "dataset_name: all\n",
67
- "description: \"MMLU dataset in English\"\n",
68
  "test_split: test\n",
69
  "fewshot_split: dev\n",
70
  "fewshot_config:\n",
@@ -81,134 +101,143 @@
81
  " aggregation: mean\n",
82
  " higher_is_better: true\n",
83
  "\"\"\"\n",
84
- "with open(\"mmlu_en_us.yaml\", \"w\") as f:\n",
85
- " f.write(YAML_mmlu_en_us_string)"
86
- ],
87
- "metadata": {
88
- "id": "xP0cC_sHih7C"
89
- },
90
- "execution_count": null,
91
- "outputs": []
92
  },
93
  {
94
  "cell_type": "markdown",
95
- "source": [
96
- "Llama Models"
97
- ],
98
  "metadata": {
99
  "id": "mJjo_A5tP-Td"
100
- }
 
 
 
101
  },
102
  {
103
  "cell_type": "code",
104
- "source": [
105
- "!lm_eval --model hf \\\n",
106
- " --model_args pretrained=meta-llama/Llama-3.2-1B-Instruct \\\n",
107
- " --include_path ./ \\\n",
108
- " --tasks mmlu_all \\\n",
109
- " --output output/mmlu/ \\\n",
110
- " --use_cache cache \\\n",
111
- " --device cuda:0 \\\n",
112
- " --log_samples\n",
113
- " # --limit 10\n"
114
- ],
115
  "metadata": {
116
  "id": "IzP5nyP0Gwk8"
117
  },
118
- "execution_count": null,
119
- "outputs": []
 
 
 
 
 
 
 
120
  },
121
  {
122
  "cell_type": "code",
123
- "source": [
124
- "!lm_eval --model hf \\\n",
125
- " --model_args pretrained=meta-llama/Llama-3.2-3B-Instruct \\\n",
126
- " --include_path ./ \\\n",
127
- " --tasks mmlu_all \\\n",
128
- " --output output/mmlu/ \\\n",
129
- " --use_cache cache \\\n",
130
- " --device cuda:0 \\\n",
131
- " --log_samples\n",
132
- " # --limit 10"
133
- ],
134
  "metadata": {
135
  "id": "oIACOAhDW5ow"
136
  },
137
- "execution_count": null,
138
- "outputs": []
 
 
 
 
 
 
 
139
  },
140
  {
141
  "cell_type": "code",
 
 
 
 
 
142
  "source": [
143
- "!lm_eval --model hf \\\n",
144
- " --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1 \\\n",
145
- " --include_path ./ \\\n",
146
- " --tasks mmlu_all \\\n",
147
- " --output output/mmlu/ \\\n",
148
- " --use_cache cache \\\n",
149
- " --device cuda:0 \\\n",
150
- " --log_samples\n",
151
- " # --limit 10"
152
- ],
153
  "metadata": {
154
- "id": "1Nxw4WNxZUyb"
155
  },
156
- "execution_count": null,
157
- "outputs": []
 
158
  },
159
  {
160
  "cell_type": "code",
161
  "source": [
162
- "!lm_eval --model hf \\\n",
163
- " --model_args pretrained=meta-llama/Meta-Llama-3-8B \\\n",
164
- " --include_path ./ \\\n",
165
- " --tasks mmlu_all \\\n",
166
- " --output output/mmlu/ \\\n",
167
- " --use_cache cache \\\n",
168
- " --device cuda:0 \\\n",
169
- " --log_samples\n",
170
- " # --limit 10"
171
  ],
172
  "metadata": {
173
- "id": "cFFYPzBIYGf7"
174
  },
175
  "execution_count": null,
176
  "outputs": []
177
  },
178
  {
179
- "cell_type": "markdown",
 
 
 
 
 
180
  "source": [
181
- "Mistral Models"
182
- ],
 
 
 
 
 
 
 
 
183
  "metadata": {
184
- "id": "1fEX-49hQ-Be"
185
- }
 
 
 
186
  },
187
  {
188
  "cell_type": "code",
189
  "source": [
190
- "!lm_eval --model hf \\\n",
191
- " --model_args pretrained=mistralai/Mistral-7B-v0.1 \\\n",
192
- " --include_path ./ \\\n",
193
- " --tasks mmlu_all \\\n",
194
- " --output output/mmlu/ \\\n",
195
- " --use_cache cache \\\n",
196
- " --device cuda:0 \\\n",
197
- " --log_samples\n",
198
- " # --limit 10"
199
  ],
200
  "metadata": {
201
- "id": "3cHI2qxN2fJ0"
202
  },
203
  "execution_count": null,
204
  "outputs": []
 
 
 
 
 
 
 
 
205
  },
206
- {
207
- "cell_type": "markdown",
208
- "source": [],
209
- "metadata": {
210
- "id": "ZUTPHnV0kMB1"
211
- }
212
  }
213
- ]
 
 
214
  }
 
1
  {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2
  "cells": [
3
  {
4
  "cell_type": "markdown",
 
 
 
5
  "metadata": {
6
  "id": "U8RTc2PmnX-v"
7
+ },
8
+ "source": [
9
+ "Initial setup"
10
+ ]
11
  },
12
  {
13
  "cell_type": "code",
14
+ "execution_count": null,
 
 
15
  "metadata": {
16
  "id": "kGW7vfRkrqHe"
17
  },
18
+ "outputs": [],
19
+ "source": [
20
+ "!pip install -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt"
21
+ ]
22
  },
23
  {
24
  "cell_type": "code",
25
+ "execution_count": null,
 
 
 
26
  "metadata": {
27
  "id": "2I850FIsCVNw"
28
  },
29
+ "outputs": [],
30
+ "source": [
31
+ "from datetime import datetime\n",
32
+ "import os\n",
33
+ "from huggingface_hub import login, upload_folder\n",
34
+ "from google.colab import userdata\n",
35
+ "import shutil\n",
36
+ "\n",
37
+ "HF_TOKEN = userdata.get('HUGGING_FACE_WRITE_TOKEN')\n",
38
+ "login(HF_TOKEN, True)\n",
39
+ "BASE_DATASET='mmlu'\n",
40
+ "REPO_ID='flunardelli/llm-metaeval'\n",
41
+ "BASE_FOLDER=f\"/content/{BASE_DATASET}/\"#{datetime.now().strftime('%Y-%m-%dT%H-%M-%S')}\n",
42
+ "OUTPUT_FOLDER=os.path.join(BASE_FOLDER,'output')\n",
43
+ "TASK_FOLDER=os.path.join(BASE_FOLDER,'tasks')\n",
44
+ "#shutil.rmtree(BASE_FOLDER)\n",
45
+ "os.makedirs(OUTPUT_FOLDER)\n",
46
+ "os.makedirs(TASK_FOLDER)\n",
47
+ "os.environ['HF_TOKEN'] = HF_TOKEN\n",
48
+ "os.environ['OUTPUT_FOLDER'] = OUTPUT_FOLDER\n",
49
+ "os.environ['TASK_FOLDER'] = TASK_FOLDER\n",
50
+ "\n",
51
+ "def hf_upload_folder(folder_path):\n",
52
+ " upload_folder(\n",
53
+ " folder_path=folder_path,\n",
54
+ " path_in_repo=\"evals/\",\n",
55
+ " repo_id=REPO_ID,\n",
56
+ " token=HF_TOKEN,\n",
57
+ " repo_type=\"dataset\"\n",
58
+ " )\n",
59
+ "\n",
60
+ "def create_task(content, filename):\n",
61
+ " filename_path = os.path.join(TASK_FOLDER,filename)\n",
62
+ " with open(filename_path, \"w\") as f:\n",
63
+ " f.write(content)"
64
+ ]
65
  },
66
  {
67
  "cell_type": "markdown",
 
 
 
68
  "metadata": {
69
  "id": "Jd2JwKZaPkNS"
70
+ },
71
+ "source": [
72
+ "Create task for MMLU all datasets"
73
+ ]
74
  },
75
  {
76
  "cell_type": "code",
77
+ "execution_count": null,
78
+ "metadata": {
79
+ "id": "xP0cC_sHih7C"
80
+ },
81
+ "outputs": [],
82
  "source": [
83
  "YAML_mmlu_en_us_string = \"\"\"\n",
84
  "task: mmlu_all\n",
85
  "dataset_path: cais/mmlu\n",
86
  "dataset_name: all\n",
87
+ "description: \"MMLU dataset\"\n",
88
  "test_split: test\n",
89
  "fewshot_split: dev\n",
90
  "fewshot_config:\n",
 
101
  " aggregation: mean\n",
102
  " higher_is_better: true\n",
103
  "\"\"\"\n",
104
+ "create_task(YAML_mmlu_en_us_string, 'mmlu_en_us.yaml')\n"
105
+ ]
 
 
 
 
 
 
106
  },
107
  {
108
  "cell_type": "markdown",
 
 
 
109
  "metadata": {
110
  "id": "mJjo_A5tP-Td"
111
+ },
112
+ "source": [
113
+ "Llama Models"
114
+ ]
115
  },
116
  {
117
  "cell_type": "code",
118
+ "execution_count": null,
 
 
 
 
 
 
 
 
 
 
119
  "metadata": {
120
  "id": "IzP5nyP0Gwk8"
121
  },
122
+ "outputs": [],
123
+ "source": [
124
+ "!accelerate launch -m lm_eval \\\n",
125
+ "--model hf --model_args pretrained=meta-llama/Llama-3.2-1B-Instruct,parallelize=True \\\n",
126
+ "--tasks mmlu_all \\\n",
127
+ "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
128
+ "--batch_size 16\n",
129
+ "#--limit 10 \\"
130
+ ]
131
  },
132
  {
133
  "cell_type": "code",
134
+ "execution_count": null,
 
 
 
 
 
 
 
 
 
 
135
  "metadata": {
136
  "id": "oIACOAhDW5ow"
137
  },
138
+ "outputs": [],
139
+ "source": [
140
+ "!accelerate launch -m lm_eval \\\n",
141
+ "--model hf --model_args pretrained=meta-llama/Llama-3.2-3B-Instruct,parallelize=True \\\n",
142
+ "--tasks mmlu_all \\\n",
143
+ "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
144
+ "--batch_size 16\n",
145
+ "#--limit 10 \\"
146
+ ]
147
  },
148
  {
149
  "cell_type": "code",
150
+ "execution_count": null,
151
+ "metadata": {
152
+ "id": "cFFYPzBIYGf7"
153
+ },
154
+ "outputs": [],
155
  "source": [
156
+ "!accelerate launch -m lm_eval \\\n",
157
+ "--model hf --model_args pretrained=meta-llama/Meta-Llama-3-8B,parallelize=True \\\n",
158
+ "--tasks mmlu_all \\\n",
159
+ "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
160
+ "--batch_size 16\n",
161
+ "#--limit 10 \\"
162
+ ]
163
+ },
164
+ {
165
+ "cell_type": "markdown",
166
  "metadata": {
167
+ "id": "1fEX-49hQ-Be"
168
  },
169
+ "source": [
170
+ "Mistral Models"
171
+ ]
172
  },
173
  {
174
  "cell_type": "code",
175
  "source": [
176
+ "!accelerate launch -m lm_eval \\\n",
177
+ "--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,parallelize=True \\\n",
178
+ "--tasks mmlu_all \\\n",
179
+ "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
180
+ "--batch_size 16\n",
181
+ "#--limit 10 \\"
 
 
 
182
  ],
183
  "metadata": {
184
+ "id": "ilu9_ulWTy3p"
185
  },
186
  "execution_count": null,
187
  "outputs": []
188
  },
189
  {
190
+ "cell_type": "code",
191
+ "execution_count": null,
192
+ "metadata": {
193
+ "id": "3cHI2qxN2fJ0"
194
+ },
195
+ "outputs": [],
196
  "source": [
197
+ "!accelerate launch -m lm_eval \\\n",
198
+ "--model hf --model_args pretrained=mistralai/Mixtral-8x22B-v0.1,parallelize=True \\\n",
199
+ "--tasks mmlu_all \\\n",
200
+ "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
201
+ "--batch_size 16\n",
202
+ "#--limit 10 \\"
203
+ ]
204
+ },
205
+ {
206
+ "cell_type": "markdown",
207
  "metadata": {
208
+ "id": "ZUTPHnV0kMB1"
209
+ },
210
+ "source": [
211
+ "Save output results"
212
+ ]
213
  },
214
  {
215
  "cell_type": "code",
216
  "source": [
217
+ "hf_upload_folder(BASE_FOLDER)"
 
 
 
 
 
 
 
 
218
  ],
219
  "metadata": {
220
+ "id": "mGGdqBNBzFYL"
221
  },
222
  "execution_count": null,
223
  "outputs": []
224
+ }
225
+ ],
226
+ "metadata": {
227
+ "accelerator": "GPU",
228
+ "colab": {
229
+ "gpuType": "T4",
230
+ "provenance": [],
231
+ "machine_shape": "hm"
232
  },
233
+ "kernelspec": {
234
+ "display_name": "Python 3",
235
+ "name": "python3"
236
+ },
237
+ "language_info": {
238
+ "name": "python"
239
  }
240
+ },
241
+ "nbformat": 4,
242
+ "nbformat_minor": 0
243
  }