Commit
·
da59d46
1
Parent(s):
f140b0f
Bug fix: right padding, and wrong model for final answer
Browse files- 02-autotrain.ipynb +86 -11
- 04-poe-eval.ipynb +160 -158
- prompt-order-experiment.cfg +0 -0
- requirements.txt +6 -1
02-autotrain.ipynb
CHANGED
@@ -18,9 +18,13 @@
|
|
18 |
},
|
19 |
{
|
20 |
"cell_type": "code",
|
21 |
-
"execution_count":
|
22 |
"id": "52543575-f92e-4038-ad13-30967f47eb7a",
|
23 |
-
"metadata": {
|
|
|
|
|
|
|
|
|
24 |
"outputs": [],
|
25 |
"source": [
|
26 |
"import os\n",
|
@@ -37,6 +41,32 @@
|
|
37 |
"## Config"
|
38 |
]
|
39 |
},
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
40 |
{
|
41 |
"cell_type": "markdown",
|
42 |
"id": "97c25070-775a-4fb1-9694-4579250686a6",
|
@@ -58,7 +88,7 @@
|
|
58 |
},
|
59 |
{
|
60 |
"cell_type": "code",
|
61 |
-
"execution_count":
|
62 |
"id": "dc2a8514-51c1-404b-8cfa-6637cc810668",
|
63 |
"metadata": {},
|
64 |
"outputs": [],
|
@@ -80,15 +110,15 @@
|
|
80 |
" },\n",
|
81 |
" },\n",
|
82 |
" \"params\": {\n",
|
83 |
-
" \"block_size\":
|
84 |
-
" \"model_max_length\":
|
85 |
" \"epochs\": 2,\n",
|
86 |
" \"batch_size\": 1,\n",
|
87 |
" \"lr\": 3e-5,\n",
|
88 |
" \"peft\": True,\n",
|
89 |
" \"quantization\": \"int4\",\n",
|
90 |
" \"target_modules\": \"all-linear\",\n",
|
91 |
-
" \"padding\": \"
|
92 |
" \"optimizer\": \"adamw_torch\",\n",
|
93 |
" \"scheduler\": \"linear\",\n",
|
94 |
" \"gradient_accumulation\": 8,\n",
|
@@ -96,7 +126,7 @@
|
|
96 |
" },\n",
|
97 |
" \"hub\": {\n",
|
98 |
" \"username\": \"derek-thomas\",\n",
|
99 |
-
" \"token\":
|
100 |
" \"push_to_hub\": True,\n",
|
101 |
" },\n",
|
102 |
"}"
|
@@ -113,7 +143,7 @@
|
|
113 |
},
|
114 |
{
|
115 |
"cell_type": "code",
|
116 |
-
"execution_count":
|
117 |
"id": "957eb2b7-feec-422f-ba46-b293d9a77c1b",
|
118 |
"metadata": {},
|
119 |
"outputs": [],
|
@@ -133,7 +163,7 @@
|
|
133 |
},
|
134 |
{
|
135 |
"cell_type": "code",
|
136 |
-
"execution_count":
|
137 |
"id": "b86702bf-f494-4951-863e-be5b8462fbd1",
|
138 |
"metadata": {},
|
139 |
"outputs": [],
|
@@ -152,10 +182,47 @@
|
|
152 |
},
|
153 |
{
|
154 |
"cell_type": "code",
|
155 |
-
"execution_count":
|
156 |
"id": "025ccd2f-de54-4ac2-9f36-f606876dcd3c",
|
157 |
"metadata": {},
|
158 |
-
"outputs": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
159 |
"source": [
|
160 |
"# Generate configs and run commands\n",
|
161 |
"for project_suffix, text_column in zip(project_suffixes, text_columns):\n",
|
@@ -173,6 +240,14 @@
|
|
173 |
" print(f\"Running autotrain with config: {config_path}\")\n",
|
174 |
" subprocess.run([\"autotrain\", \"--config\", config_path])"
|
175 |
]
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
176 |
}
|
177 |
],
|
178 |
"metadata": {
|
|
|
18 |
},
|
19 |
{
|
20 |
"cell_type": "code",
|
21 |
+
"execution_count": 1,
|
22 |
"id": "52543575-f92e-4038-ad13-30967f47eb7a",
|
23 |
+
"metadata": {
|
24 |
+
"jupyter": {
|
25 |
+
"is_executing": true
|
26 |
+
}
|
27 |
+
},
|
28 |
"outputs": [],
|
29 |
"source": [
|
30 |
"import os\n",
|
|
|
41 |
"## Config"
|
42 |
]
|
43 |
},
|
44 |
+
{
|
45 |
+
"cell_type": "code",
|
46 |
+
"execution_count": 3,
|
47 |
+
"id": "6992324b-173c-4335-b557-cf78fbb2dd93",
|
48 |
+
"metadata": {},
|
49 |
+
"outputs": [
|
50 |
+
{
|
51 |
+
"data": {
|
52 |
+
"application/vnd.jupyter.widget-view+json": {
|
53 |
+
"model_id": "24ea5bd118ed4632a6ad859c4c976e66",
|
54 |
+
"version_major": 2,
|
55 |
+
"version_minor": 0
|
56 |
+
},
|
57 |
+
"text/plain": [
|
58 |
+
"VBox(children=(HTML(value='<center> <img\\nsrc=https://huggingface.co/front/assets/huggingface_logo-noborder.sv…"
|
59 |
+
]
|
60 |
+
},
|
61 |
+
"metadata": {},
|
62 |
+
"output_type": "display_data"
|
63 |
+
}
|
64 |
+
],
|
65 |
+
"source": [
|
66 |
+
"from huggingface_hub import login, get_token\n",
|
67 |
+
"login()"
|
68 |
+
]
|
69 |
+
},
|
70 |
{
|
71 |
"cell_type": "markdown",
|
72 |
"id": "97c25070-775a-4fb1-9694-4579250686a6",
|
|
|
88 |
},
|
89 |
{
|
90 |
"cell_type": "code",
|
91 |
+
"execution_count": 4,
|
92 |
"id": "dc2a8514-51c1-404b-8cfa-6637cc810668",
|
93 |
"metadata": {},
|
94 |
"outputs": [],
|
|
|
110 |
" },\n",
|
111 |
" },\n",
|
112 |
" \"params\": {\n",
|
113 |
+
" \"block_size\": 512,\n",
|
114 |
+
" \"model_max_length\": 1500,\n",
|
115 |
" \"epochs\": 2,\n",
|
116 |
" \"batch_size\": 1,\n",
|
117 |
" \"lr\": 3e-5,\n",
|
118 |
" \"peft\": True,\n",
|
119 |
" \"quantization\": \"int4\",\n",
|
120 |
" \"target_modules\": \"all-linear\",\n",
|
121 |
+
" \"padding\": \"right\",\n",
|
122 |
" \"optimizer\": \"adamw_torch\",\n",
|
123 |
" \"scheduler\": \"linear\",\n",
|
124 |
" \"gradient_accumulation\": 8,\n",
|
|
|
126 |
" },\n",
|
127 |
" \"hub\": {\n",
|
128 |
" \"username\": \"derek-thomas\",\n",
|
129 |
+
" \"token\": get_token(),\n",
|
130 |
" \"push_to_hub\": True,\n",
|
131 |
" },\n",
|
132 |
"}"
|
|
|
143 |
},
|
144 |
{
|
145 |
"cell_type": "code",
|
146 |
+
"execution_count": 5,
|
147 |
"id": "957eb2b7-feec-422f-ba46-b293d9a77c1b",
|
148 |
"metadata": {},
|
149 |
"outputs": [],
|
|
|
163 |
},
|
164 |
{
|
165 |
"cell_type": "code",
|
166 |
+
"execution_count": 6,
|
167 |
"id": "b86702bf-f494-4951-863e-be5b8462fbd1",
|
168 |
"metadata": {},
|
169 |
"outputs": [],
|
|
|
182 |
},
|
183 |
{
|
184 |
"cell_type": "code",
|
185 |
+
"execution_count": 9,
|
186 |
"id": "025ccd2f-de54-4ac2-9f36-f606876dcd3c",
|
187 |
"metadata": {},
|
188 |
+
"outputs": [
|
189 |
+
{
|
190 |
+
"name": "stdout",
|
191 |
+
"output_type": "stream",
|
192 |
+
"text": [
|
193 |
+
"Running autotrain with config: ./autotrain_configs/conversation_RFA_gpt3_5.yml\n",
|
194 |
+
"INFO | 2024-12-12 20:45:45 | autotrain.cli.autotrain:main:60 - Using AutoTrain configuration: ./autotrain_configs/conversation_RFA_gpt3_5.yml\n",
|
195 |
+
"INFO | 2024-12-12 20:45:45 | autotrain.parser:__post_init__:170 - Running task: lm_training\n",
|
196 |
+
"INFO | 2024-12-12 20:45:45 | autotrain.parser:__post_init__:171 - Using backend: spaces-l4x1\n",
|
197 |
+
"INFO | 2024-12-12 20:45:45 | autotrain.parser:run:234 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'mistral-v03-poe-RFA-gpt3-5', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_RFA_gpt3_5', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
|
198 |
+
"INFO | 2024-12-12 20:45:52 | autotrain.parser:run:239 - Job ID: derek-thomas/autotrain-mistral-v03-poe-RFA-gpt3-5\n",
|
199 |
+
"Running autotrain with config: ./autotrain_configs/conversation_RFA_mistral.yml\n",
|
200 |
+
"INFO | 2024-12-12 20:45:56 | autotrain.cli.autotrain:main:60 - Using AutoTrain configuration: ./autotrain_configs/conversation_RFA_mistral.yml\n",
|
201 |
+
"INFO | 2024-12-12 20:45:56 | autotrain.parser:__post_init__:170 - Running task: lm_training\n",
|
202 |
+
"INFO | 2024-12-12 20:45:56 | autotrain.parser:__post_init__:171 - Using backend: spaces-l4x1\n",
|
203 |
+
"INFO | 2024-12-12 20:45:56 | autotrain.parser:run:234 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'mistral-v03-poe-RFA-mistral', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_RFA_mistral', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
|
204 |
+
"INFO | 2024-12-12 20:46:01 | autotrain.parser:run:239 - Job ID: derek-thomas/autotrain-mistral-v03-poe-RFA-mistral\n",
|
205 |
+
"Running autotrain with config: ./autotrain_configs/conversation_FAR_gpt3_5.yml\n",
|
206 |
+
"INFO | 2024-12-12 20:46:05 | autotrain.cli.autotrain:main:60 - Using AutoTrain configuration: ./autotrain_configs/conversation_FAR_gpt3_5.yml\n",
|
207 |
+
"INFO | 2024-12-12 20:46:05 | autotrain.parser:__post_init__:170 - Running task: lm_training\n",
|
208 |
+
"INFO | 2024-12-12 20:46:05 | autotrain.parser:__post_init__:171 - Using backend: spaces-l4x1\n",
|
209 |
+
"INFO | 2024-12-12 20:46:05 | autotrain.parser:run:234 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'mistral-v03-poe-FAR-gpt3-5', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_FAR_gpt3_5', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
|
210 |
+
"INFO | 2024-12-12 20:46:12 | autotrain.parser:run:239 - Job ID: derek-thomas/autotrain-mistral-v03-poe-FAR-gpt3-5\n",
|
211 |
+
"Running autotrain with config: ./autotrain_configs/conversation_FAR_mistral.yml\n",
|
212 |
+
"INFO | 2024-12-12 20:46:16 | autotrain.cli.autotrain:main:60 - Using AutoTrain configuration: ./autotrain_configs/conversation_FAR_mistral.yml\n",
|
213 |
+
"INFO | 2024-12-12 20:46:16 | autotrain.parser:__post_init__:170 - Running task: lm_training\n",
|
214 |
+
"INFO | 2024-12-12 20:46:16 | autotrain.parser:__post_init__:171 - Using backend: spaces-l4x1\n",
|
215 |
+
"INFO | 2024-12-12 20:46:16 | autotrain.parser:run:234 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'mistral-v03-poe-FAR-mistral', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_FAR_mistral', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
|
216 |
+
"INFO | 2024-12-12 20:46:22 | autotrain.parser:run:239 - Job ID: derek-thomas/autotrain-mistral-v03-poe-FAR-mistral\n",
|
217 |
+
"Running autotrain with config: ./autotrain_configs/conversation_FA.yml\n",
|
218 |
+
"INFO | 2024-12-12 20:46:25 | autotrain.cli.autotrain:main:60 - Using AutoTrain configuration: ./autotrain_configs/conversation_FA.yml\n",
|
219 |
+
"INFO | 2024-12-12 20:46:25 | autotrain.parser:__post_init__:170 - Running task: lm_training\n",
|
220 |
+
"INFO | 2024-12-12 20:46:25 | autotrain.parser:__post_init__:171 - Using backend: spaces-l4x1\n",
|
221 |
+
"INFO | 2024-12-12 20:46:25 | autotrain.parser:run:234 - {'model': 'mistralai/Mistral-7B-Instruct-v0.3', 'project_name': 'mistral-v03-poe-FA', 'data_path': 'derek-thomas/labeled-multiple-choice-explained-mistral-tokenized', 'train_split': 'train', 'valid_split': None, 'add_eos_token': True, 'block_size': 512, 'model_max_length': 1500, 'padding': 'right', 'trainer': 'sft', 'use_flash_attention_2': False, 'log': 'tensorboard', 'disable_gradient_checkpointing': False, 'logging_steps': -1, 'eval_strategy': 'epoch', 'save_total_limit': 1, 'auto_find_batch_size': False, 'mixed_precision': 'bf16', 'lr': 3e-05, 'epochs': 2, 'batch_size': 1, 'warmup_ratio': 0.1, 'gradient_accumulation': 8, 'optimizer': 'adamw_torch', 'scheduler': 'linear', 'weight_decay': 0.0, 'max_grad_norm': 1.0, 'seed': 42, 'chat_template': 'none', 'quantization': 'int4', 'target_modules': 'all-linear', 'merge_adapter': False, 'peft': True, 'lora_r': 16, 'lora_alpha': 32, 'lora_dropout': 0.05, 'model_ref': None, 'dpo_beta': 0.1, 'max_prompt_length': 128, 'max_completion_length': None, 'prompt_text_column': None, 'text_column': 'conversation_FA', 'rejected_text_column': None, 'push_to_hub': True, 'username': 'derek-thomas', 'token': '*****', 'unsloth': False, 'distributed_backend': None}\n",
|
222 |
+
"INFO | 2024-12-12 20:46:31 | autotrain.parser:run:239 - Job ID: derek-thomas/autotrain-mistral-v03-poe-FA\n"
|
223 |
+
]
|
224 |
+
}
|
225 |
+
],
|
226 |
"source": [
|
227 |
"# Generate configs and run commands\n",
|
228 |
"for project_suffix, text_column in zip(project_suffixes, text_columns):\n",
|
|
|
240 |
" print(f\"Running autotrain with config: {config_path}\")\n",
|
241 |
" subprocess.run([\"autotrain\", \"--config\", config_path])"
|
242 |
]
|
243 |
+
},
|
244 |
+
{
|
245 |
+
"cell_type": "code",
|
246 |
+
"execution_count": null,
|
247 |
+
"id": "67675837-2a38-4427-9186-32a25a970ff3",
|
248 |
+
"metadata": {},
|
249 |
+
"outputs": [],
|
250 |
+
"source": []
|
251 |
}
|
252 |
],
|
253 |
"metadata": {
|
04-poe-eval.ipynb
CHANGED
@@ -104,7 +104,7 @@
|
|
104 |
{
|
105 |
"data": {
|
106 |
"text/plain": [
|
107 |
-
"'derek-thomas/mistral-v03-poe-RFA-mistral,derek-thomas/mistral-v03-poe-FAR-mistral,derek-thomas/mistral-v03-poe-RFA-gpt3-5,derek-thomas/mistral-v03-poe-FAR-gpt3-5,derek-thomas/mistral-v03-poe-
|
108 |
]
|
109 |
},
|
110 |
"execution_count": 3,
|
@@ -137,7 +137,7 @@
|
|
137 |
" },\n",
|
138 |
" 'FA': {\n",
|
139 |
" 'pydantic': FAModel,\n",
|
140 |
-
" \"lora\": \"derek-thomas/mistral-v03-poe-
|
141 |
" \"column\": 'user_prompt_FA',\n",
|
142 |
" },\n",
|
143 |
" 'base': {\n",
|
@@ -162,7 +162,7 @@
|
|
162 |
{
|
163 |
"data": {
|
164 |
"application/vnd.jupyter.widget-view+json": {
|
165 |
-
"model_id": "
|
166 |
"version_major": 2,
|
167 |
"version_minor": 0
|
168 |
},
|
@@ -239,10 +239,9 @@
|
|
239 |
"\n",
|
240 |
"def get_my_endpoint():\n",
|
241 |
" name = f\"prompt-order-experiment\"\n",
|
242 |
-
" namespace='
|
243 |
" try:\n",
|
244 |
" endpoint = get_inference_endpoint(name, namespace=namespace)\n",
|
245 |
-
" endpoint.wait()\n",
|
246 |
" except:\n",
|
247 |
" # Custom Docker image details\n",
|
248 |
" custom_image = {\n",
|
@@ -279,7 +278,6 @@
|
|
279 |
" custom_image=custom_image,\n",
|
280 |
" secrets=secrets\n",
|
281 |
" )\n",
|
282 |
-
" # endpoint.wait()\n",
|
283 |
" \n",
|
284 |
" print(\"Your model is ready to use!\")\n",
|
285 |
" endpoint.wait()\n",
|
@@ -297,8 +295,8 @@
|
|
297 |
"output_type": "stream",
|
298 |
"text": [
|
299 |
"Your model is ready to use!\n",
|
300 |
-
"CPU times: user
|
301 |
-
"Wall time:
|
302 |
]
|
303 |
}
|
304 |
],
|
@@ -359,7 +357,7 @@
|
|
359 |
{
|
360 |
"data": {
|
361 |
"text/plain": [
|
362 |
-
"'{\"Reasoning\": \"Busses are primarily used for transporting
|
363 |
]
|
364 |
},
|
365 |
"execution_count": 10,
|
@@ -388,7 +386,7 @@
|
|
388 |
{
|
389 |
"data": {
|
390 |
"text/plain": [
|
391 |
-
"'{\"Reasoning\": \"Busses are primarily used for transporting humans, especially in urban areas
|
392 |
]
|
393 |
},
|
394 |
"execution_count": 11,
|
@@ -400,7 +398,7 @@
|
|
400 |
"key = 'RFA-gpt3-5'\n",
|
401 |
"response = endpoint.client.text_generation(\n",
|
402 |
" prompt=user_prompt_RFA,\n",
|
403 |
-
" max_new_tokens=
|
404 |
" adapter_id=experiments[key]['lora'],\n",
|
405 |
" grammar={\"type\": \"json\", \"value\": experiments[key]['pydantic'].schema()},\n",
|
406 |
")\n",
|
@@ -450,7 +448,7 @@
|
|
450 |
{
|
451 |
"data": {
|
452 |
"text/plain": [
|
453 |
-
"'{\"Final Answer\": \"b\", \"Reasoning\": \"Busses are primarily used for transporting humans, especially in urban areas, to facilitate
|
454 |
]
|
455 |
},
|
456 |
"execution_count": 13,
|
@@ -479,7 +477,7 @@
|
|
479 |
{
|
480 |
"data": {
|
481 |
"text/plain": [
|
482 |
-
"'{\"Final Answer\": \"b\", \"Reasoning\": \"Busses are primarily used for transporting humans.
|
483 |
]
|
484 |
},
|
485 |
"execution_count": 14,
|
@@ -541,7 +539,7 @@
|
|
541 |
{
|
542 |
"data": {
|
543 |
"text/plain": [
|
544 |
-
"
|
545 |
]
|
546 |
},
|
547 |
"execution_count": 16,
|
@@ -656,7 +654,7 @@
|
|
656 |
{
|
657 |
"data": {
|
658 |
"application/vnd.jupyter.widget-view+json": {
|
659 |
-
"model_id": "
|
660 |
"version_major": 2,
|
661 |
"version_minor": 0
|
662 |
},
|
@@ -765,7 +763,7 @@
|
|
765 |
{
|
766 |
"data": {
|
767 |
"text/plain": [
|
768 |
-
"InferenceEndpoint(name='prompt-order-experiment', namespace='
|
769 |
]
|
770 |
},
|
771 |
"execution_count": 19,
|
@@ -859,7 +857,7 @@
|
|
859 |
" <td>{\"Final Answer\": \"b\", \"Reasoning\": \"Busses are...</td>\n",
|
860 |
" <td>{\"Reasoning\": \"Busses are primarily used for t...</td>\n",
|
861 |
" <td>{\"Final Answer\": \"b\", \"Reasoning\": \"Busses are...</td>\n",
|
862 |
-
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
863 |
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
864 |
" </tr>\n",
|
865 |
" <tr>\n",
|
@@ -880,9 +878,9 @@
|
|
880 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
881 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
882 |
" <td>{\"Reasoning\": \"Global warming is primarily cau...</td>\n",
|
883 |
-
" <td>{\"Final Answer\": \"
|
884 |
" <td>{\"Reasoning\": \"The nucleus of a cell (option a...</td>\n",
|
885 |
-
" <td>{\"Final Answer\": \"a\"
|
886 |
" <td>{\"Final Answer\": \"a\"}</td>\n",
|
887 |
" <td>{\"Final Answer\": \"a\"}</td>\n",
|
888 |
" </tr>\n",
|
@@ -904,10 +902,10 @@
|
|
904 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
905 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
906 |
" <td>{\"Reasoning\": \"The question asks for an organi...</td>\n",
|
907 |
-
" <td>{\"Final Answer\": \"
|
908 |
-
" <td>{\"Reasoning\": \"The
|
909 |
-
" <td>{\"Final Answer\": \"
|
910 |
-
" <td>{\"Final Answer\": \"
|
911 |
" <td>{\"Final Answer\": \"c\"}</td>\n",
|
912 |
" </tr>\n",
|
913 |
" <tr>\n",
|
@@ -927,11 +925,11 @@
|
|
927 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
928 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
929 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
930 |
-
" <td>{\"Reasoning\": \"
|
931 |
" <td>{\"Final Answer\": \"d\", \"Reasoning\": \"Bacteria c...</td>\n",
|
932 |
-
" <td>{
|
933 |
" <td>{\"Final Answer\": \"d\", \"Reasoning\": \"Bacteria c...</td>\n",
|
934 |
-
" <td>{\"Final Answer\": \"
|
935 |
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
936 |
" </tr>\n",
|
937 |
" <tr>\n",
|
@@ -951,11 +949,11 @@
|
|
951 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
952 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
953 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
954 |
-
" <td>{\"Reasoning\": \"The question asks
|
955 |
" <td>{\"Final Answer\": \"a\", \"Reasoning\": \"Plants and...</td>\n",
|
956 |
-
" <td>{\"Reasoning\": \"The question asks about the
|
957 |
-
" <td>{\"Final Answer\": \"
|
958 |
-
" <td>{\"Final Answer\": \"
|
959 |
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
960 |
" </tr>\n",
|
961 |
" <tr>\n",
|
@@ -1002,7 +1000,7 @@
|
|
1002 |
" <td>{\"Reasoning\": \"The question asks for a way to ...</td>\n",
|
1003 |
" <td>{\"Final Answer\": \"g\", \"Reasoning\": \"Recycling ...</td>\n",
|
1004 |
" <td>{\"Reasoning\": \"Mining, fossil fuels, deforesta...</td>\n",
|
1005 |
-
" <td>{\"Final Answer\": \"g\"
|
1006 |
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
1007 |
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
1008 |
" </tr>\n",
|
@@ -1025,9 +1023,9 @@
|
|
1025 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1026 |
" <td>{\"Reasoning\": \"The question asks for a term th...</td>\n",
|
1027 |
" <td>{\"Final Answer\": \"d\", \"Reasoning\": \"A drought ...</td>\n",
|
1028 |
-
" <td>{
|
1029 |
-
" <td>{\"Final Answer\": \"d\"
|
1030 |
-
" <td>{\"Final Answer\": \"
|
1031 |
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1032 |
" </tr>\n",
|
1033 |
" <tr>\n",
|
@@ -1051,7 +1049,7 @@
|
|
1051 |
" <td>{\"Final Answer\": \"e\", \"Reasoning\": \"Ingestion ...</td>\n",
|
1052 |
" <td>{\"Reasoning\": \"Ingestion is the process of tak...</td>\n",
|
1053 |
" <td>{\"Final Answer\": \"e\", \"Reasoning\": \"Ingestion ...</td>\n",
|
1054 |
-
" <td>{\"Final Answer\": \"
|
1055 |
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1056 |
" </tr>\n",
|
1057 |
" <tr>\n",
|
@@ -1074,7 +1072,7 @@
|
|
1074 |
" <td>{\"Reasoning\": \"Ultraviolet (UV) light is a typ...</td>\n",
|
1075 |
" <td>{\"Final Answer\": \"b\", \"Reasoning\": \"Ultraviole...</td>\n",
|
1076 |
" <td>{\"Reasoning\": \"Ultraviolet (UV) light is a typ...</td>\n",
|
1077 |
-
" <td>{\"Final Answer\": \"
|
1078 |
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
1079 |
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
1080 |
" </tr>\n",
|
@@ -1095,11 +1093,11 @@
|
|
1095 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1096 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1097 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1098 |
-
" <td>{\"Reasoning\": \"The
|
1099 |
" <td>{\"Final Answer\": \"c\", \"Reasoning\": \"Running is...</td>\n",
|
1100 |
" <td>{\"Reasoning\": \"A body's strength is primarily ...</td>\n",
|
1101 |
" <td>{\"Final Answer\": \"c\", \"Reasoning\": \"Running is...</td>\n",
|
1102 |
-
" <td>{\"Final Answer\": \"
|
1103 |
" <td>{\"Final Answer\": \"c\"}</td>\n",
|
1104 |
" </tr>\n",
|
1105 |
" </tbody>\n",
|
@@ -1268,19 +1266,19 @@
|
|
1268 |
"0 {\"Reasoning\": \"Busses are primarily used for t... \n",
|
1269 |
"1 {\"Reasoning\": \"Global warming is primarily cau... \n",
|
1270 |
"2 {\"Reasoning\": \"The question asks for an organi... \n",
|
1271 |
-
"3 {\"Reasoning\": \"
|
1272 |
-
"4 {\"Reasoning\": \"The question asks
|
1273 |
"... ... \n",
|
1274 |
"1678 {\"Reasoning\": \"The question asks for a way to ... \n",
|
1275 |
"1679 {\"Reasoning\": \"The question asks for a term th... \n",
|
1276 |
"1680 {\"Reasoning\": \"Ingestion is the process of tak... \n",
|
1277 |
"1681 {\"Reasoning\": \"Ultraviolet (UV) light is a typ... \n",
|
1278 |
-
"1682 {\"Reasoning\": \"The
|
1279 |
"\n",
|
1280 |
" responses_FAR_mistral \\\n",
|
1281 |
"0 {\"Final Answer\": \"b\", \"Reasoning\": \"Busses are... \n",
|
1282 |
-
"1 {\"Final Answer\": \"
|
1283 |
-
"2 {\"Final Answer\": \"
|
1284 |
"3 {\"Final Answer\": \"d\", \"Reasoning\": \"Bacteria c... \n",
|
1285 |
"4 {\"Final Answer\": \"a\", \"Reasoning\": \"Plants and... \n",
|
1286 |
"... ... \n",
|
@@ -1293,41 +1291,41 @@
|
|
1293 |
" responses_RFA_gpt3_5 \\\n",
|
1294 |
"0 {\"Reasoning\": \"Busses are primarily used for t... \n",
|
1295 |
"1 {\"Reasoning\": \"The nucleus of a cell (option a... \n",
|
1296 |
-
"2 {\"Reasoning\": \"The
|
1297 |
-
"3 {
|
1298 |
-
"4 {\"Reasoning\": \"The question asks about the
|
1299 |
"... ... \n",
|
1300 |
"1678 {\"Reasoning\": \"Mining, fossil fuels, deforesta... \n",
|
1301 |
-
"1679 {
|
1302 |
"1680 {\"Reasoning\": \"Ingestion is the process of tak... \n",
|
1303 |
"1681 {\"Reasoning\": \"Ultraviolet (UV) light is a typ... \n",
|
1304 |
"1682 {\"Reasoning\": \"A body's strength is primarily ... \n",
|
1305 |
"\n",
|
1306 |
" responses_FAR_gpt3_5 \\\n",
|
1307 |
"0 {\"Final Answer\": \"b\", \"Reasoning\": \"Busses are... \n",
|
1308 |
-
"1 {\"Final Answer\": \"a\"
|
1309 |
-
"2 {\"Final Answer\": \"
|
1310 |
"3 {\"Final Answer\": \"d\", \"Reasoning\": \"Bacteria c... \n",
|
1311 |
-
"4 {\"Final Answer\": \"
|
1312 |
"... ... \n",
|
1313 |
-
"1678 {\"Final Answer\": \"g\"
|
1314 |
-
"1679 {\"Final Answer\": \"d\"
|
1315 |
"1680 {\"Final Answer\": \"e\", \"Reasoning\": \"Ingestion ... \n",
|
1316 |
-
"1681 {\"Final Answer\": \"
|
1317 |
"1682 {\"Final Answer\": \"c\", \"Reasoning\": \"Running is... \n",
|
1318 |
"\n",
|
1319 |
-
"
|
1320 |
-
"0 {\"Final Answer\": \"b\"} {\"Final Answer\": \"b\"} \n",
|
1321 |
-
"1
|
1322 |
-
"2
|
1323 |
-
"3
|
1324 |
-
"4
|
1325 |
-
"...
|
1326 |
-
"1678
|
1327 |
-
"1679
|
1328 |
-
"1680
|
1329 |
-
"1681
|
1330 |
-
"1682
|
1331 |
"\n",
|
1332 |
"[1683 rows x 21 columns]"
|
1333 |
]
|
@@ -1343,13 +1341,17 @@
|
|
1343 |
},
|
1344 |
{
|
1345 |
"cell_type": "code",
|
1346 |
-
"execution_count":
|
1347 |
"id": "8619f9f5-9fe4-433e-b524-51c2b12e8d12",
|
1348 |
"metadata": {},
|
1349 |
"outputs": [],
|
1350 |
"source": [
|
1351 |
"def extract_final_answer(response):\n",
|
1352 |
-
"
|
|
|
|
|
|
|
|
|
1353 |
"\n",
|
1354 |
"# Create new columns for predictions\n",
|
1355 |
"df['predictions_base'] = df['responses_base'].apply(extract_final_answer)\n",
|
@@ -1362,7 +1364,7 @@
|
|
1362 |
},
|
1363 |
{
|
1364 |
"cell_type": "code",
|
1365 |
-
"execution_count":
|
1366 |
"id": "938cf2a3-2fed-42a3-82ec-a56cb0ea9f37",
|
1367 |
"metadata": {},
|
1368 |
"outputs": [
|
@@ -1370,12 +1372,12 @@
|
|
1370 |
"name": "stdout",
|
1371 |
"output_type": "stream",
|
1372 |
"text": [
|
1373 |
-
"Base: \t\t\t\t\t\t45.
|
1374 |
-
"Final Answer: \t\t\t\t\
|
1375 |
-
"Reasoning and then the Final Answer (Mistral): \
|
1376 |
-
"Final Answer and then the Reasoning (Mistral): \
|
1377 |
-
"Reasoning and then the Final Answer (GPT-3.5): \
|
1378 |
-
"Final Answer and then the Reasoning (GPT-3.5): \
|
1379 |
]
|
1380 |
}
|
1381 |
],
|
@@ -1392,7 +1394,7 @@
|
|
1392 |
},
|
1393 |
{
|
1394 |
"cell_type": "code",
|
1395 |
-
"execution_count":
|
1396 |
"id": "83aae472-513b-43c3-9ee8-64d4cda775e0",
|
1397 |
"metadata": {},
|
1398 |
"outputs": [
|
@@ -1434,10 +1436,10 @@
|
|
1434 |
" <th>responses_base</th>\n",
|
1435 |
" <th>predictions_base</th>\n",
|
1436 |
" <th>predictions_FA</th>\n",
|
1437 |
-
" <th>predictions_RFA_mistral</th>\n",
|
1438 |
" <th>predictions_FAR_mistral</th>\n",
|
1439 |
" <th>predictions_RFA_gpt3_5</th>\n",
|
1440 |
" <th>predictions_FAR_gpt3_5</th>\n",
|
|
|
1441 |
" </tr>\n",
|
1442 |
" </thead>\n",
|
1443 |
" <tbody>\n",
|
@@ -1456,7 +1458,7 @@
|
|
1456 |
" <td>...</td>\n",
|
1457 |
" <td>{\"Reasoning\": \"Busses are primarily used for t...</td>\n",
|
1458 |
" <td>{\"Final Answer\": \"b\", \"Reasoning\": \"Busses are...</td>\n",
|
1459 |
-
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
1460 |
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
1461 |
" <td>b</td>\n",
|
1462 |
" <td>b</td>\n",
|
@@ -1479,7 +1481,7 @@
|
|
1479 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1480 |
" <td>...</td>\n",
|
1481 |
" <td>{\"Reasoning\": \"The nucleus of a cell (option a...</td>\n",
|
1482 |
-
" <td>{\"Final Answer\": \"a\"
|
1483 |
" <td>{\"Final Answer\": \"a\"}</td>\n",
|
1484 |
" <td>{\"Final Answer\": \"a\"}</td>\n",
|
1485 |
" <td>a</td>\n",
|
@@ -1487,7 +1489,7 @@
|
|
1487 |
" <td>g</td>\n",
|
1488 |
" <td>a</td>\n",
|
1489 |
" <td>a</td>\n",
|
1490 |
-
" <td>
|
1491 |
" </tr>\n",
|
1492 |
" <tr>\n",
|
1493 |
" <th>2</th>\n",
|
@@ -1502,13 +1504,13 @@
|
|
1502 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1503 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1504 |
" <td>...</td>\n",
|
1505 |
-
" <td>{\"Reasoning\": \"The
|
1506 |
-
" <td>{\"Final Answer\": \"
|
1507 |
-
" <td>{\"Final Answer\": \"
|
1508 |
" <td>{\"Final Answer\": \"c\"}</td>\n",
|
1509 |
" <td>c</td>\n",
|
1510 |
-
" <td>
|
1511 |
-
" <td>
|
1512 |
" <td>c</td>\n",
|
1513 |
" <td>c</td>\n",
|
1514 |
" <td>e</td>\n",
|
@@ -1526,16 +1528,16 @@
|
|
1526 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1527 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1528 |
" <td>...</td>\n",
|
1529 |
-
" <td>{
|
1530 |
" <td>{\"Final Answer\": \"d\", \"Reasoning\": \"Bacteria c...</td>\n",
|
|
|
1531 |
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1532 |
-
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1533 |
-
" <td>d</td>\n",
|
1534 |
-
" <td>d</td>\n",
|
1535 |
" <td>d</td>\n",
|
|
|
1536 |
" <td>d</td>\n",
|
1537 |
" <td>d</td>\n",
|
1538 |
" <td>d</td>\n",
|
|
|
1539 |
" </tr>\n",
|
1540 |
" <tr>\n",
|
1541 |
" <th>4</th>\n",
|
@@ -1550,16 +1552,16 @@
|
|
1550 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1551 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1552 |
" <td>...</td>\n",
|
1553 |
-
" <td>{\"Reasoning\": \"The question asks about the
|
1554 |
-
" <td>{\"Final Answer\": \"
|
1555 |
-
" <td>{\"Final Answer\": \"
|
1556 |
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
1557 |
" <td>g</td>\n",
|
1558 |
-
" <td>
|
1559 |
-
" <td>b</td>\n",
|
1560 |
" <td>a</td>\n",
|
1561 |
" <td>f</td>\n",
|
1562 |
-
" <td>
|
|
|
1563 |
" </tr>\n",
|
1564 |
" <tr>\n",
|
1565 |
" <th>...</th>\n",
|
@@ -1599,7 +1601,7 @@
|
|
1599 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1600 |
" <td>...</td>\n",
|
1601 |
" <td>{\"Reasoning\": \"Mining, fossil fuels, deforesta...</td>\n",
|
1602 |
-
" <td>{\"Final Answer\": \"g\"
|
1603 |
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
1604 |
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
1605 |
" <td>g</td>\n",
|
@@ -1622,15 +1624,15 @@
|
|
1622 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1623 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1624 |
" <td>...</td>\n",
|
1625 |
-
" <td>{
|
1626 |
-
" <td>{\"Final Answer\": \"d\"
|
1627 |
-
" <td>{\"Final Answer\": \"
|
1628 |
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1629 |
" <td>d</td>\n",
|
|
|
1630 |
" <td>d</td>\n",
|
1631 |
" <td>d</td>\n",
|
1632 |
" <td>d</td>\n",
|
1633 |
-
" <td>f</td>\n",
|
1634 |
" <td>d</td>\n",
|
1635 |
" </tr>\n",
|
1636 |
" <tr>\n",
|
@@ -1648,14 +1650,14 @@
|
|
1648 |
" <td>...</td>\n",
|
1649 |
" <td>{\"Reasoning\": \"Ingestion is the process of tak...</td>\n",
|
1650 |
" <td>{\"Final Answer\": \"e\", \"Reasoning\": \"Ingestion ...</td>\n",
|
|
|
1651 |
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1652 |
-
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1653 |
-
" <td>d</td>\n",
|
1654 |
" <td>d</td>\n",
|
1655 |
" <td>e</td>\n",
|
1656 |
" <td>e</td>\n",
|
1657 |
" <td>e</td>\n",
|
1658 |
" <td>e</td>\n",
|
|
|
1659 |
" </tr>\n",
|
1660 |
" <tr>\n",
|
1661 |
" <th>1681</th>\n",
|
@@ -1671,7 +1673,7 @@
|
|
1671 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1672 |
" <td>...</td>\n",
|
1673 |
" <td>{\"Reasoning\": \"Ultraviolet (UV) light is a typ...</td>\n",
|
1674 |
-
" <td>{\"Final Answer\": \"
|
1675 |
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
1676 |
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
1677 |
" <td>b</td>\n",
|
@@ -1679,7 +1681,7 @@
|
|
1679 |
" <td>b</td>\n",
|
1680 |
" <td>b</td>\n",
|
1681 |
" <td>b</td>\n",
|
1682 |
-
" <td>
|
1683 |
" </tr>\n",
|
1684 |
" <tr>\n",
|
1685 |
" <th>1682</th>\n",
|
@@ -1696,13 +1698,13 @@
|
|
1696 |
" <td>...</td>\n",
|
1697 |
" <td>{\"Reasoning\": \"A body's strength is primarily ...</td>\n",
|
1698 |
" <td>{\"Final Answer\": \"c\", \"Reasoning\": \"Running is...</td>\n",
|
1699 |
-
" <td>{\"Final Answer\": \"
|
1700 |
" <td>{\"Final Answer\": \"c\"}</td>\n",
|
1701 |
" <td>c</td>\n",
|
1702 |
-
" <td>c</td>\n",
|
1703 |
" <td>f</td>\n",
|
1704 |
" <td>c</td>\n",
|
1705 |
-
" <td>
|
|
|
1706 |
" <td>c</td>\n",
|
1707 |
" </tr>\n",
|
1708 |
" </tbody>\n",
|
@@ -1818,72 +1820,72 @@
|
|
1818 |
" responses_RFA_gpt3_5 \\\n",
|
1819 |
"0 {\"Reasoning\": \"Busses are primarily used for t... \n",
|
1820 |
"1 {\"Reasoning\": \"The nucleus of a cell (option a... \n",
|
1821 |
-
"2 {\"Reasoning\": \"The
|
1822 |
-
"3 {
|
1823 |
-
"4 {\"Reasoning\": \"The question asks about the
|
1824 |
"... ... \n",
|
1825 |
"1678 {\"Reasoning\": \"Mining, fossil fuels, deforesta... \n",
|
1826 |
-
"1679 {
|
1827 |
"1680 {\"Reasoning\": \"Ingestion is the process of tak... \n",
|
1828 |
"1681 {\"Reasoning\": \"Ultraviolet (UV) light is a typ... \n",
|
1829 |
"1682 {\"Reasoning\": \"A body's strength is primarily ... \n",
|
1830 |
"\n",
|
1831 |
" responses_FAR_gpt3_5 \\\n",
|
1832 |
"0 {\"Final Answer\": \"b\", \"Reasoning\": \"Busses are... \n",
|
1833 |
-
"1 {\"Final Answer\": \"a\"
|
1834 |
-
"2 {\"Final Answer\": \"
|
1835 |
"3 {\"Final Answer\": \"d\", \"Reasoning\": \"Bacteria c... \n",
|
1836 |
-
"4 {\"Final Answer\": \"
|
1837 |
"... ... \n",
|
1838 |
-
"1678 {\"Final Answer\": \"g\"
|
1839 |
-
"1679 {\"Final Answer\": \"d\"
|
1840 |
"1680 {\"Final Answer\": \"e\", \"Reasoning\": \"Ingestion ... \n",
|
1841 |
-
"1681 {\"Final Answer\": \"
|
1842 |
"1682 {\"Final Answer\": \"c\", \"Reasoning\": \"Running is... \n",
|
1843 |
"\n",
|
1844 |
-
"
|
1845 |
-
"0 {\"Final Answer\": \"b\"} {\"Final Answer\": \"b\"} b \n",
|
1846 |
-
"1
|
1847 |
-
"2
|
1848 |
-
"3
|
1849 |
-
"4
|
1850 |
-
"...
|
1851 |
-
"1678
|
1852 |
-
"1679
|
1853 |
-
"1680
|
1854 |
-
"1681
|
1855 |
-
"1682
|
1856 |
"\n",
|
1857 |
-
" predictions_FA
|
1858 |
-
"0 b b
|
1859 |
-
"1 a g
|
1860 |
-
"2
|
1861 |
-
"3
|
1862 |
-
"4
|
1863 |
-
"... ... ...
|
1864 |
-
"1678 g g
|
1865 |
-
"1679
|
1866 |
-
"1680
|
1867 |
-
"1681 b b
|
1868 |
-
"1682
|
1869 |
"\n",
|
1870 |
-
"
|
1871 |
-
"0 b
|
1872 |
-
"1 a
|
1873 |
-
"2 c
|
1874 |
-
"3 d
|
1875 |
-
"4
|
1876 |
-
"... ...
|
1877 |
-
"1678 g
|
1878 |
-
"1679
|
1879 |
-
"1680 e
|
1880 |
-
"1681 b
|
1881 |
-
"1682
|
1882 |
"\n",
|
1883 |
"[1683 rows x 27 columns]"
|
1884 |
]
|
1885 |
},
|
1886 |
-
"execution_count":
|
1887 |
"metadata": {},
|
1888 |
"output_type": "execute_result"
|
1889 |
}
|
@@ -1894,14 +1896,14 @@
|
|
1894 |
},
|
1895 |
{
|
1896 |
"cell_type": "code",
|
1897 |
-
"execution_count":
|
1898 |
"id": "45c08dd4-0b98-4e0f-b487-549f60518a4e",
|
1899 |
"metadata": {},
|
1900 |
"outputs": [
|
1901 |
{
|
1902 |
"data": {
|
1903 |
"application/vnd.jupyter.widget-view+json": {
|
1904 |
-
"model_id": "
|
1905 |
"version_major": 2,
|
1906 |
"version_minor": 0
|
1907 |
},
|
@@ -1915,7 +1917,7 @@
|
|
1915 |
{
|
1916 |
"data": {
|
1917 |
"application/vnd.jupyter.widget-view+json": {
|
1918 |
-
"model_id": "
|
1919 |
"version_major": 2,
|
1920 |
"version_minor": 0
|
1921 |
},
|
@@ -1929,10 +1931,10 @@
|
|
1929 |
{
|
1930 |
"data": {
|
1931 |
"text/plain": [
|
1932 |
-
"CommitInfo(commit_url='https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained-mistral-results/commit/
|
1933 |
]
|
1934 |
},
|
1935 |
-
"execution_count":
|
1936 |
"metadata": {},
|
1937 |
"output_type": "execute_result"
|
1938 |
}
|
|
|
104 |
{
|
105 |
"data": {
|
106 |
"text/plain": [
|
107 |
+
"'derek-thomas/mistral-v03-poe-RFA-mistral,derek-thomas/mistral-v03-poe-FAR-mistral,derek-thomas/mistral-v03-poe-RFA-gpt3-5,derek-thomas/mistral-v03-poe-FAR-gpt3-5,derek-thomas/mistral-v03-poe-FA'"
|
108 |
]
|
109 |
},
|
110 |
"execution_count": 3,
|
|
|
137 |
" },\n",
|
138 |
" 'FA': {\n",
|
139 |
" 'pydantic': FAModel,\n",
|
140 |
+
" \"lora\": \"derek-thomas/mistral-v03-poe-FA\",\n",
|
141 |
" \"column\": 'user_prompt_FA',\n",
|
142 |
" },\n",
|
143 |
" 'base': {\n",
|
|
|
162 |
{
|
163 |
"data": {
|
164 |
"application/vnd.jupyter.widget-view+json": {
|
165 |
+
"model_id": "50dbecc676db4dc78dd1974d2f1a87dc",
|
166 |
"version_major": 2,
|
167 |
"version_minor": 0
|
168 |
},
|
|
|
239 |
"\n",
|
240 |
"def get_my_endpoint():\n",
|
241 |
" name = f\"prompt-order-experiment\"\n",
|
242 |
+
" namespace='derek-thomas'\n",
|
243 |
" try:\n",
|
244 |
" endpoint = get_inference_endpoint(name, namespace=namespace)\n",
|
|
|
245 |
" except:\n",
|
246 |
" # Custom Docker image details\n",
|
247 |
" custom_image = {\n",
|
|
|
278 |
" custom_image=custom_image,\n",
|
279 |
" secrets=secrets\n",
|
280 |
" )\n",
|
|
|
281 |
" \n",
|
282 |
" print(\"Your model is ready to use!\")\n",
|
283 |
" endpoint.wait()\n",
|
|
|
295 |
"output_type": "stream",
|
296 |
"text": [
|
297 |
"Your model is ready to use!\n",
|
298 |
+
"CPU times: user 21.1 ms, sys: 10 ms, total: 31.1 ms\n",
|
299 |
+
"Wall time: 1.72 s\n"
|
300 |
]
|
301 |
}
|
302 |
],
|
|
|
357 |
{
|
358 |
"data": {
|
359 |
"text/plain": [
|
360 |
+
"'{\"Reasoning\": \"Busses are primarily used for transporting people, so the correct answer is (b) Transporting humans. The other options are not related to the function of a bus.\", \"Final Answer\": \"b\"}'"
|
361 |
]
|
362 |
},
|
363 |
"execution_count": 10,
|
|
|
386 |
{
|
387 |
"data": {
|
388 |
"text/plain": [
|
389 |
+
"'{\"Reasoning\": \"Busses are primarily used for transporting humans, especially in urban areas where public transportation is necessary. They provide a means of transportation for a large number of people at once, reducing traffic congestion and carbon emissions. Therefore, the correct answer is (b) transporting humans.\", \"Final Answer\": \"b\"}'"
|
390 |
]
|
391 |
},
|
392 |
"execution_count": 11,
|
|
|
398 |
"key = 'RFA-gpt3-5'\n",
|
399 |
"response = endpoint.client.text_generation(\n",
|
400 |
" prompt=user_prompt_RFA,\n",
|
401 |
+
" max_new_tokens=OUTPUT_TOKENS,\n",
|
402 |
" adapter_id=experiments[key]['lora'],\n",
|
403 |
" grammar={\"type\": \"json\", \"value\": experiments[key]['pydantic'].schema()},\n",
|
404 |
")\n",
|
|
|
448 |
{
|
449 |
"data": {
|
450 |
"text/plain": [
|
451 |
+
"'{\"Final Answer\": \"b\", \"Reasoning\": \"Busses are primarily used for transporting humans, especially in urban areas, to facilitate commuting and travel. They are not used for protective shelter, helping other species benefit, transporting airplanes, serving as a backbone, communication, safe operation, or safe driving.\"}'"
|
452 |
]
|
453 |
},
|
454 |
"execution_count": 13,
|
|
|
477 |
{
|
478 |
"data": {
|
479 |
"text/plain": [
|
480 |
+
"'{\"Final Answer\": \"b\", \"Reasoning\": \"Busses are primarily used for transporting humans, especially in urban areas, to facilitate their travel from one place to another. The other options are incorrect because busses do not provide protective shelter, do not help other species benefit, are not used to transport airplanes, do not serve as a backbone, are not used for communication, and are not related to safe operation or driving.\"}'"
|
481 |
]
|
482 |
},
|
483 |
"execution_count": 14,
|
|
|
539 |
{
|
540 |
"data": {
|
541 |
"text/plain": [
|
542 |
+
"\"{'Final Answer': 'b'}\""
|
543 |
]
|
544 |
},
|
545 |
"execution_count": 16,
|
|
|
654 |
{
|
655 |
"data": {
|
656 |
"application/vnd.jupyter.widget-view+json": {
|
657 |
+
"model_id": "be0bd3e278ae4d90a161918772ee71e8",
|
658 |
"version_major": 2,
|
659 |
"version_minor": 0
|
660 |
},
|
|
|
763 |
{
|
764 |
"data": {
|
765 |
"text/plain": [
|
766 |
+
"InferenceEndpoint(name='prompt-order-experiment', namespace='derek-thomas', repository='mistralai/Mistral-7B-Instruct-v0.3', status='paused', url=None)"
|
767 |
]
|
768 |
},
|
769 |
"execution_count": 19,
|
|
|
857 |
" <td>{\"Final Answer\": \"b\", \"Reasoning\": \"Busses are...</td>\n",
|
858 |
" <td>{\"Reasoning\": \"Busses are primarily used for t...</td>\n",
|
859 |
" <td>{\"Final Answer\": \"b\", \"Reasoning\": \"Busses are...</td>\n",
|
860 |
+
" <td>{ \"Final Answer\": \"b\" }</td>\n",
|
861 |
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
862 |
" </tr>\n",
|
863 |
" <tr>\n",
|
|
|
878 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
879 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
880 |
" <td>{\"Reasoning\": \"Global warming is primarily cau...</td>\n",
|
881 |
+
" <td>{\"Final Answer\": \"g\", \"Reasoning\": \"Riding a b...</td>\n",
|
882 |
" <td>{\"Reasoning\": \"The nucleus of a cell (option a...</td>\n",
|
883 |
+
" <td>{ \"Final Answer\": \"a\" ,\"Reasoning\": \"The nucle...</td>\n",
|
884 |
" <td>{\"Final Answer\": \"a\"}</td>\n",
|
885 |
" <td>{\"Final Answer\": \"a\"}</td>\n",
|
886 |
" </tr>\n",
|
|
|
902 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
903 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
904 |
" <td>{\"Reasoning\": \"The question asks for an organi...</td>\n",
|
905 |
+
" <td>{\"Final Answer\": \"b\", \"Reasoning\": \"Cameras ar...</td>\n",
|
906 |
+
" <td>{\"Reasoning\": \"The correct answer is (c) Cells...</td>\n",
|
907 |
+
" <td>{\"Final Answer\": \"c\", \"Reasoning\": \"Cells are ...</td>\n",
|
908 |
+
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
909 |
" <td>{\"Final Answer\": \"c\"}</td>\n",
|
910 |
" </tr>\n",
|
911 |
" <tr>\n",
|
|
|
925 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
926 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
927 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
928 |
+
" <td>{\"Reasoning\": \"The question asks about what is...</td>\n",
|
929 |
" <td>{\"Final Answer\": \"d\", \"Reasoning\": \"Bacteria c...</td>\n",
|
930 |
+
" <td>{\"Reasoning\": \"Bacteria are microorganisms tha...</td>\n",
|
931 |
" <td>{\"Final Answer\": \"d\", \"Reasoning\": \"Bacteria c...</td>\n",
|
932 |
+
" <td>{\"Final Answer\": \"e\"}</td>\n",
|
933 |
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
934 |
" </tr>\n",
|
935 |
" <tr>\n",
|
|
|
949 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
950 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
951 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
952 |
+
" <td>{\"Reasoning\": \"The question asks for the livin...</td>\n",
|
953 |
" <td>{\"Final Answer\": \"a\", \"Reasoning\": \"Plants and...</td>\n",
|
954 |
+
" <td>{\"Reasoning\": \"The question asks about the liv...</td>\n",
|
955 |
+
" <td>{ \"Final Answer\": \"a\" ,\"Reasoning\": \"Plants an...</td>\n",
|
956 |
+
" <td>{\"Final Answer\": \"a\"}</td>\n",
|
957 |
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
958 |
" </tr>\n",
|
959 |
" <tr>\n",
|
|
|
1000 |
" <td>{\"Reasoning\": \"The question asks for a way to ...</td>\n",
|
1001 |
" <td>{\"Final Answer\": \"g\", \"Reasoning\": \"Recycling ...</td>\n",
|
1002 |
" <td>{\"Reasoning\": \"Mining, fossil fuels, deforesta...</td>\n",
|
1003 |
+
" <td>{ \"Final Answer\": \"g\" ,\"Reasoning\": \"Recycling...</td>\n",
|
1004 |
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
1005 |
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
1006 |
" </tr>\n",
|
|
|
1023 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1024 |
" <td>{\"Reasoning\": \"The question asks for a term th...</td>\n",
|
1025 |
" <td>{\"Final Answer\": \"d\", \"Reasoning\": \"A drought ...</td>\n",
|
1026 |
+
" <td>{\"Reasoning\": \"A drought is a prolonged period...</td>\n",
|
1027 |
+
" <td>{ \"Final Answer\": \"d\" ,\"Reasoning\": \"A drought...</td>\n",
|
1028 |
+
" <td>{\"Final Answer\": \"a\"}</td>\n",
|
1029 |
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1030 |
" </tr>\n",
|
1031 |
" <tr>\n",
|
|
|
1049 |
" <td>{\"Final Answer\": \"e\", \"Reasoning\": \"Ingestion ...</td>\n",
|
1050 |
" <td>{\"Reasoning\": \"Ingestion is the process of tak...</td>\n",
|
1051 |
" <td>{\"Final Answer\": \"e\", \"Reasoning\": \"Ingestion ...</td>\n",
|
1052 |
+
" <td>{\"Final Answer\": \"e\"}</td>\n",
|
1053 |
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1054 |
" </tr>\n",
|
1055 |
" <tr>\n",
|
|
|
1072 |
" <td>{\"Reasoning\": \"Ultraviolet (UV) light is a typ...</td>\n",
|
1073 |
" <td>{\"Final Answer\": \"b\", \"Reasoning\": \"Ultraviole...</td>\n",
|
1074 |
" <td>{\"Reasoning\": \"Ultraviolet (UV) light is a typ...</td>\n",
|
1075 |
+
" <td>{ \"Final Answer\": \"b\" ,\"Reasoning\": \"Ultraviol...</td>\n",
|
1076 |
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
1077 |
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
1078 |
" </tr>\n",
|
|
|
1093 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1094 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1095 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1096 |
+
" <td>{\"Reasoning\": \"The correct answer is 'Exercise...</td>\n",
|
1097 |
" <td>{\"Final Answer\": \"c\", \"Reasoning\": \"Running is...</td>\n",
|
1098 |
" <td>{\"Reasoning\": \"A body's strength is primarily ...</td>\n",
|
1099 |
" <td>{\"Final Answer\": \"c\", \"Reasoning\": \"Running is...</td>\n",
|
1100 |
+
" <td>{\"Final Answer\": \"f\"}</td>\n",
|
1101 |
" <td>{\"Final Answer\": \"c\"}</td>\n",
|
1102 |
" </tr>\n",
|
1103 |
" </tbody>\n",
|
|
|
1266 |
"0 {\"Reasoning\": \"Busses are primarily used for t... \n",
|
1267 |
"1 {\"Reasoning\": \"Global warming is primarily cau... \n",
|
1268 |
"2 {\"Reasoning\": \"The question asks for an organi... \n",
|
1269 |
+
"3 {\"Reasoning\": \"The question asks about what is... \n",
|
1270 |
+
"4 {\"Reasoning\": \"The question asks for the livin... \n",
|
1271 |
"... ... \n",
|
1272 |
"1678 {\"Reasoning\": \"The question asks for a way to ... \n",
|
1273 |
"1679 {\"Reasoning\": \"The question asks for a term th... \n",
|
1274 |
"1680 {\"Reasoning\": \"Ingestion is the process of tak... \n",
|
1275 |
"1681 {\"Reasoning\": \"Ultraviolet (UV) light is a typ... \n",
|
1276 |
+
"1682 {\"Reasoning\": \"The correct answer is 'Exercise... \n",
|
1277 |
"\n",
|
1278 |
" responses_FAR_mistral \\\n",
|
1279 |
"0 {\"Final Answer\": \"b\", \"Reasoning\": \"Busses are... \n",
|
1280 |
+
"1 {\"Final Answer\": \"g\", \"Reasoning\": \"Riding a b... \n",
|
1281 |
+
"2 {\"Final Answer\": \"b\", \"Reasoning\": \"Cameras ar... \n",
|
1282 |
"3 {\"Final Answer\": \"d\", \"Reasoning\": \"Bacteria c... \n",
|
1283 |
"4 {\"Final Answer\": \"a\", \"Reasoning\": \"Plants and... \n",
|
1284 |
"... ... \n",
|
|
|
1291 |
" responses_RFA_gpt3_5 \\\n",
|
1292 |
"0 {\"Reasoning\": \"Busses are primarily used for t... \n",
|
1293 |
"1 {\"Reasoning\": \"The nucleus of a cell (option a... \n",
|
1294 |
+
"2 {\"Reasoning\": \"The correct answer is (c) Cells... \n",
|
1295 |
+
"3 {\"Reasoning\": \"Bacteria are microorganisms tha... \n",
|
1296 |
+
"4 {\"Reasoning\": \"The question asks about the liv... \n",
|
1297 |
"... ... \n",
|
1298 |
"1678 {\"Reasoning\": \"Mining, fossil fuels, deforesta... \n",
|
1299 |
+
"1679 {\"Reasoning\": \"A drought is a prolonged period... \n",
|
1300 |
"1680 {\"Reasoning\": \"Ingestion is the process of tak... \n",
|
1301 |
"1681 {\"Reasoning\": \"Ultraviolet (UV) light is a typ... \n",
|
1302 |
"1682 {\"Reasoning\": \"A body's strength is primarily ... \n",
|
1303 |
"\n",
|
1304 |
" responses_FAR_gpt3_5 \\\n",
|
1305 |
"0 {\"Final Answer\": \"b\", \"Reasoning\": \"Busses are... \n",
|
1306 |
+
"1 { \"Final Answer\": \"a\" ,\"Reasoning\": \"The nucle... \n",
|
1307 |
+
"2 {\"Final Answer\": \"c\", \"Reasoning\": \"Cells are ... \n",
|
1308 |
"3 {\"Final Answer\": \"d\", \"Reasoning\": \"Bacteria c... \n",
|
1309 |
+
"4 { \"Final Answer\": \"a\" ,\"Reasoning\": \"Plants an... \n",
|
1310 |
"... ... \n",
|
1311 |
+
"1678 { \"Final Answer\": \"g\" ,\"Reasoning\": \"Recycling... \n",
|
1312 |
+
"1679 { \"Final Answer\": \"d\" ,\"Reasoning\": \"A drought... \n",
|
1313 |
"1680 {\"Final Answer\": \"e\", \"Reasoning\": \"Ingestion ... \n",
|
1314 |
+
"1681 { \"Final Answer\": \"b\" ,\"Reasoning\": \"Ultraviol... \n",
|
1315 |
"1682 {\"Final Answer\": \"c\", \"Reasoning\": \"Running is... \n",
|
1316 |
"\n",
|
1317 |
+
" responses_FA responses_base \n",
|
1318 |
+
"0 { \"Final Answer\": \"b\" } {\"Final Answer\": \"b\"} \n",
|
1319 |
+
"1 {\"Final Answer\": \"a\"} {\"Final Answer\": \"a\"} \n",
|
1320 |
+
"2 {\"Final Answer\": \"b\"} {\"Final Answer\": \"c\"} \n",
|
1321 |
+
"3 {\"Final Answer\": \"e\"} {\"Final Answer\": \"d\"} \n",
|
1322 |
+
"4 {\"Final Answer\": \"a\"} {\"Final Answer\": \"g\"} \n",
|
1323 |
+
"... ... ... \n",
|
1324 |
+
"1678 {\"Final Answer\": \"g\"} {\"Final Answer\": \"g\"} \n",
|
1325 |
+
"1679 {\"Final Answer\": \"a\"} {\"Final Answer\": \"d\"} \n",
|
1326 |
+
"1680 {\"Final Answer\": \"e\"} {\"Final Answer\": \"d\"} \n",
|
1327 |
+
"1681 {\"Final Answer\": \"b\"} {\"Final Answer\": \"b\"} \n",
|
1328 |
+
"1682 {\"Final Answer\": \"f\"} {\"Final Answer\": \"c\"} \n",
|
1329 |
"\n",
|
1330 |
"[1683 rows x 21 columns]"
|
1331 |
]
|
|
|
1341 |
},
|
1342 |
{
|
1343 |
"cell_type": "code",
|
1344 |
+
"execution_count": 26,
|
1345 |
"id": "8619f9f5-9fe4-433e-b524-51c2b12e8d12",
|
1346 |
"metadata": {},
|
1347 |
"outputs": [],
|
1348 |
"source": [
|
1349 |
"def extract_final_answer(response):\n",
|
1350 |
+
" try:\n",
|
1351 |
+
" answer = json.loads(response).get(\"Final Answer\")\n",
|
1352 |
+
" except:\n",
|
1353 |
+
" answer = 'x'\n",
|
1354 |
+
" return answer\n",
|
1355 |
"\n",
|
1356 |
"# Create new columns for predictions\n",
|
1357 |
"df['predictions_base'] = df['responses_base'].apply(extract_final_answer)\n",
|
|
|
1364 |
},
|
1365 |
{
|
1366 |
"cell_type": "code",
|
1367 |
+
"execution_count": 28,
|
1368 |
"id": "938cf2a3-2fed-42a3-82ec-a56cb0ea9f37",
|
1369 |
"metadata": {},
|
1370 |
"outputs": [
|
|
|
1372 |
"name": "stdout",
|
1373 |
"output_type": "stream",
|
1374 |
"text": [
|
1375 |
+
"Base: \t\t\t\t\t\t45.22%\n",
|
1376 |
+
"Final Answer: \t\t\t\t\t64.53%\n",
|
1377 |
+
"Reasoning and then the Final Answer (Mistral): \t55.02%\n",
|
1378 |
+
"Final Answer and then the Reasoning (Mistral): \t61.79%\n",
|
1379 |
+
"Reasoning and then the Final Answer (GPT-3.5): \t57.28%\n",
|
1380 |
+
"Final Answer and then the Reasoning (GPT-3.5): \t61.62%\n"
|
1381 |
]
|
1382 |
}
|
1383 |
],
|
|
|
1394 |
},
|
1395 |
{
|
1396 |
"cell_type": "code",
|
1397 |
+
"execution_count": 29,
|
1398 |
"id": "83aae472-513b-43c3-9ee8-64d4cda775e0",
|
1399 |
"metadata": {},
|
1400 |
"outputs": [
|
|
|
1436 |
" <th>responses_base</th>\n",
|
1437 |
" <th>predictions_base</th>\n",
|
1438 |
" <th>predictions_FA</th>\n",
|
|
|
1439 |
" <th>predictions_FAR_mistral</th>\n",
|
1440 |
" <th>predictions_RFA_gpt3_5</th>\n",
|
1441 |
" <th>predictions_FAR_gpt3_5</th>\n",
|
1442 |
+
" <th>predictions_RFA_mistral</th>\n",
|
1443 |
" </tr>\n",
|
1444 |
" </thead>\n",
|
1445 |
" <tbody>\n",
|
|
|
1458 |
" <td>...</td>\n",
|
1459 |
" <td>{\"Reasoning\": \"Busses are primarily used for t...</td>\n",
|
1460 |
" <td>{\"Final Answer\": \"b\", \"Reasoning\": \"Busses are...</td>\n",
|
1461 |
+
" <td>{ \"Final Answer\": \"b\" }</td>\n",
|
1462 |
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
1463 |
" <td>b</td>\n",
|
1464 |
" <td>b</td>\n",
|
|
|
1481 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1482 |
" <td>...</td>\n",
|
1483 |
" <td>{\"Reasoning\": \"The nucleus of a cell (option a...</td>\n",
|
1484 |
+
" <td>{ \"Final Answer\": \"a\" ,\"Reasoning\": \"The nucle...</td>\n",
|
1485 |
" <td>{\"Final Answer\": \"a\"}</td>\n",
|
1486 |
" <td>{\"Final Answer\": \"a\"}</td>\n",
|
1487 |
" <td>a</td>\n",
|
|
|
1489 |
" <td>g</td>\n",
|
1490 |
" <td>a</td>\n",
|
1491 |
" <td>a</td>\n",
|
1492 |
+
" <td>g</td>\n",
|
1493 |
" </tr>\n",
|
1494 |
" <tr>\n",
|
1495 |
" <th>2</th>\n",
|
|
|
1504 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1505 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1506 |
" <td>...</td>\n",
|
1507 |
+
" <td>{\"Reasoning\": \"The correct answer is (c) Cells...</td>\n",
|
1508 |
+
" <td>{\"Final Answer\": \"c\", \"Reasoning\": \"Cells are ...</td>\n",
|
1509 |
+
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
1510 |
" <td>{\"Final Answer\": \"c\"}</td>\n",
|
1511 |
" <td>c</td>\n",
|
1512 |
+
" <td>b</td>\n",
|
1513 |
+
" <td>b</td>\n",
|
1514 |
" <td>c</td>\n",
|
1515 |
" <td>c</td>\n",
|
1516 |
" <td>e</td>\n",
|
|
|
1528 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1529 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1530 |
" <td>...</td>\n",
|
1531 |
+
" <td>{\"Reasoning\": \"Bacteria are microorganisms tha...</td>\n",
|
1532 |
" <td>{\"Final Answer\": \"d\", \"Reasoning\": \"Bacteria c...</td>\n",
|
1533 |
+
" <td>{\"Final Answer\": \"e\"}</td>\n",
|
1534 |
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
|
|
|
|
|
|
1535 |
" <td>d</td>\n",
|
1536 |
+
" <td>e</td>\n",
|
1537 |
" <td>d</td>\n",
|
1538 |
" <td>d</td>\n",
|
1539 |
" <td>d</td>\n",
|
1540 |
+
" <td>e</td>\n",
|
1541 |
" </tr>\n",
|
1542 |
" <tr>\n",
|
1543 |
" <th>4</th>\n",
|
|
|
1552 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1553 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1554 |
" <td>...</td>\n",
|
1555 |
+
" <td>{\"Reasoning\": \"The question asks about the liv...</td>\n",
|
1556 |
+
" <td>{ \"Final Answer\": \"a\" ,\"Reasoning\": \"Plants an...</td>\n",
|
1557 |
+
" <td>{\"Final Answer\": \"a\"}</td>\n",
|
1558 |
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
1559 |
" <td>g</td>\n",
|
1560 |
+
" <td>a</td>\n",
|
|
|
1561 |
" <td>a</td>\n",
|
1562 |
" <td>f</td>\n",
|
1563 |
+
" <td>a</td>\n",
|
1564 |
+
" <td>b</td>\n",
|
1565 |
" </tr>\n",
|
1566 |
" <tr>\n",
|
1567 |
" <th>...</th>\n",
|
|
|
1601 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1602 |
" <td>...</td>\n",
|
1603 |
" <td>{\"Reasoning\": \"Mining, fossil fuels, deforesta...</td>\n",
|
1604 |
+
" <td>{ \"Final Answer\": \"g\" ,\"Reasoning\": \"Recycling...</td>\n",
|
1605 |
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
1606 |
" <td>{\"Final Answer\": \"g\"}</td>\n",
|
1607 |
" <td>g</td>\n",
|
|
|
1624 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1625 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1626 |
" <td>...</td>\n",
|
1627 |
+
" <td>{\"Reasoning\": \"A drought is a prolonged period...</td>\n",
|
1628 |
+
" <td>{ \"Final Answer\": \"d\" ,\"Reasoning\": \"A drought...</td>\n",
|
1629 |
+
" <td>{\"Final Answer\": \"a\"}</td>\n",
|
1630 |
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
1631 |
" <td>d</td>\n",
|
1632 |
+
" <td>a</td>\n",
|
1633 |
" <td>d</td>\n",
|
1634 |
" <td>d</td>\n",
|
1635 |
" <td>d</td>\n",
|
|
|
1636 |
" <td>d</td>\n",
|
1637 |
" </tr>\n",
|
1638 |
" <tr>\n",
|
|
|
1650 |
" <td>...</td>\n",
|
1651 |
" <td>{\"Reasoning\": \"Ingestion is the process of tak...</td>\n",
|
1652 |
" <td>{\"Final Answer\": \"e\", \"Reasoning\": \"Ingestion ...</td>\n",
|
1653 |
+
" <td>{\"Final Answer\": \"e\"}</td>\n",
|
1654 |
" <td>{\"Final Answer\": \"d\"}</td>\n",
|
|
|
|
|
1655 |
" <td>d</td>\n",
|
1656 |
" <td>e</td>\n",
|
1657 |
" <td>e</td>\n",
|
1658 |
" <td>e</td>\n",
|
1659 |
" <td>e</td>\n",
|
1660 |
+
" <td>c</td>\n",
|
1661 |
" </tr>\n",
|
1662 |
" <tr>\n",
|
1663 |
" <th>1681</th>\n",
|
|
|
1673 |
" <td><s>[INST] Answer the Question and include your...</td>\n",
|
1674 |
" <td>...</td>\n",
|
1675 |
" <td>{\"Reasoning\": \"Ultraviolet (UV) light is a typ...</td>\n",
|
1676 |
+
" <td>{ \"Final Answer\": \"b\" ,\"Reasoning\": \"Ultraviol...</td>\n",
|
1677 |
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
1678 |
" <td>{\"Final Answer\": \"b\"}</td>\n",
|
1679 |
" <td>b</td>\n",
|
|
|
1681 |
" <td>b</td>\n",
|
1682 |
" <td>b</td>\n",
|
1683 |
" <td>b</td>\n",
|
1684 |
+
" <td>b</td>\n",
|
1685 |
" </tr>\n",
|
1686 |
" <tr>\n",
|
1687 |
" <th>1682</th>\n",
|
|
|
1698 |
" <td>...</td>\n",
|
1699 |
" <td>{\"Reasoning\": \"A body's strength is primarily ...</td>\n",
|
1700 |
" <td>{\"Final Answer\": \"c\", \"Reasoning\": \"Running is...</td>\n",
|
1701 |
+
" <td>{\"Final Answer\": \"f\"}</td>\n",
|
1702 |
" <td>{\"Final Answer\": \"c\"}</td>\n",
|
1703 |
" <td>c</td>\n",
|
|
|
1704 |
" <td>f</td>\n",
|
1705 |
" <td>c</td>\n",
|
1706 |
+
" <td>d</td>\n",
|
1707 |
+
" <td>c</td>\n",
|
1708 |
" <td>c</td>\n",
|
1709 |
" </tr>\n",
|
1710 |
" </tbody>\n",
|
|
|
1820 |
" responses_RFA_gpt3_5 \\\n",
|
1821 |
"0 {\"Reasoning\": \"Busses are primarily used for t... \n",
|
1822 |
"1 {\"Reasoning\": \"The nucleus of a cell (option a... \n",
|
1823 |
+
"2 {\"Reasoning\": \"The correct answer is (c) Cells... \n",
|
1824 |
+
"3 {\"Reasoning\": \"Bacteria are microorganisms tha... \n",
|
1825 |
+
"4 {\"Reasoning\": \"The question asks about the liv... \n",
|
1826 |
"... ... \n",
|
1827 |
"1678 {\"Reasoning\": \"Mining, fossil fuels, deforesta... \n",
|
1828 |
+
"1679 {\"Reasoning\": \"A drought is a prolonged period... \n",
|
1829 |
"1680 {\"Reasoning\": \"Ingestion is the process of tak... \n",
|
1830 |
"1681 {\"Reasoning\": \"Ultraviolet (UV) light is a typ... \n",
|
1831 |
"1682 {\"Reasoning\": \"A body's strength is primarily ... \n",
|
1832 |
"\n",
|
1833 |
" responses_FAR_gpt3_5 \\\n",
|
1834 |
"0 {\"Final Answer\": \"b\", \"Reasoning\": \"Busses are... \n",
|
1835 |
+
"1 { \"Final Answer\": \"a\" ,\"Reasoning\": \"The nucle... \n",
|
1836 |
+
"2 {\"Final Answer\": \"c\", \"Reasoning\": \"Cells are ... \n",
|
1837 |
"3 {\"Final Answer\": \"d\", \"Reasoning\": \"Bacteria c... \n",
|
1838 |
+
"4 { \"Final Answer\": \"a\" ,\"Reasoning\": \"Plants an... \n",
|
1839 |
"... ... \n",
|
1840 |
+
"1678 { \"Final Answer\": \"g\" ,\"Reasoning\": \"Recycling... \n",
|
1841 |
+
"1679 { \"Final Answer\": \"d\" ,\"Reasoning\": \"A drought... \n",
|
1842 |
"1680 {\"Final Answer\": \"e\", \"Reasoning\": \"Ingestion ... \n",
|
1843 |
+
"1681 { \"Final Answer\": \"b\" ,\"Reasoning\": \"Ultraviol... \n",
|
1844 |
"1682 {\"Final Answer\": \"c\", \"Reasoning\": \"Running is... \n",
|
1845 |
"\n",
|
1846 |
+
" responses_FA responses_base predictions_base \\\n",
|
1847 |
+
"0 { \"Final Answer\": \"b\" } {\"Final Answer\": \"b\"} b \n",
|
1848 |
+
"1 {\"Final Answer\": \"a\"} {\"Final Answer\": \"a\"} a \n",
|
1849 |
+
"2 {\"Final Answer\": \"b\"} {\"Final Answer\": \"c\"} c \n",
|
1850 |
+
"3 {\"Final Answer\": \"e\"} {\"Final Answer\": \"d\"} d \n",
|
1851 |
+
"4 {\"Final Answer\": \"a\"} {\"Final Answer\": \"g\"} g \n",
|
1852 |
+
"... ... ... ... \n",
|
1853 |
+
"1678 {\"Final Answer\": \"g\"} {\"Final Answer\": \"g\"} g \n",
|
1854 |
+
"1679 {\"Final Answer\": \"a\"} {\"Final Answer\": \"d\"} d \n",
|
1855 |
+
"1680 {\"Final Answer\": \"e\"} {\"Final Answer\": \"d\"} d \n",
|
1856 |
+
"1681 {\"Final Answer\": \"b\"} {\"Final Answer\": \"b\"} b \n",
|
1857 |
+
"1682 {\"Final Answer\": \"f\"} {\"Final Answer\": \"c\"} c \n",
|
1858 |
"\n",
|
1859 |
+
" predictions_FA predictions_FAR_mistral predictions_RFA_gpt3_5 \\\n",
|
1860 |
+
"0 b b b \n",
|
1861 |
+
"1 a g a \n",
|
1862 |
+
"2 b b c \n",
|
1863 |
+
"3 e d d \n",
|
1864 |
+
"4 a a f \n",
|
1865 |
+
"... ... ... ... \n",
|
1866 |
+
"1678 g g g \n",
|
1867 |
+
"1679 a d d \n",
|
1868 |
+
"1680 e e e \n",
|
1869 |
+
"1681 b b b \n",
|
1870 |
+
"1682 f c d \n",
|
1871 |
"\n",
|
1872 |
+
" predictions_FAR_gpt3_5 predictions_RFA_mistral \n",
|
1873 |
+
"0 b b \n",
|
1874 |
+
"1 a g \n",
|
1875 |
+
"2 c e \n",
|
1876 |
+
"3 d e \n",
|
1877 |
+
"4 a b \n",
|
1878 |
+
"... ... ... \n",
|
1879 |
+
"1678 g g \n",
|
1880 |
+
"1679 d d \n",
|
1881 |
+
"1680 e c \n",
|
1882 |
+
"1681 b b \n",
|
1883 |
+
"1682 c c \n",
|
1884 |
"\n",
|
1885 |
"[1683 rows x 27 columns]"
|
1886 |
]
|
1887 |
},
|
1888 |
+
"execution_count": 29,
|
1889 |
"metadata": {},
|
1890 |
"output_type": "execute_result"
|
1891 |
}
|
|
|
1896 |
},
|
1897 |
{
|
1898 |
"cell_type": "code",
|
1899 |
+
"execution_count": 30,
|
1900 |
"id": "45c08dd4-0b98-4e0f-b487-549f60518a4e",
|
1901 |
"metadata": {},
|
1902 |
"outputs": [
|
1903 |
{
|
1904 |
"data": {
|
1905 |
"application/vnd.jupyter.widget-view+json": {
|
1906 |
+
"model_id": "23d5dbd0a91d436fb9920dfe81e4803a",
|
1907 |
"version_major": 2,
|
1908 |
"version_minor": 0
|
1909 |
},
|
|
|
1917 |
{
|
1918 |
"data": {
|
1919 |
"application/vnd.jupyter.widget-view+json": {
|
1920 |
+
"model_id": "0b25bcb277574e8792b14e838a32fe25",
|
1921 |
"version_major": 2,
|
1922 |
"version_minor": 0
|
1923 |
},
|
|
|
1931 |
{
|
1932 |
"data": {
|
1933 |
"text/plain": [
|
1934 |
+
"CommitInfo(commit_url='https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained-mistral-results/commit/796d0867b715f2fad05d6e54ad1e0e0504ca670c', commit_message='Upload dataset', commit_description='', oid='796d0867b715f2fad05d6e54ad1e0e0504ca670c', pr_url=None, repo_url=RepoUrl('https://huggingface.co/datasets/derek-thomas/labeled-multiple-choice-explained-mistral-results', endpoint='https://huggingface.co', repo_type='dataset', repo_id='derek-thomas/labeled-multiple-choice-explained-mistral-results'), pr_revision=None, pr_num=None)"
|
1935 |
]
|
1936 |
},
|
1937 |
+
"execution_count": 30,
|
1938 |
"metadata": {},
|
1939 |
"output_type": "execute_result"
|
1940 |
}
|
prompt-order-experiment.cfg
ADDED
File without changes
|
requirements.txt
CHANGED
@@ -8,4 +8,9 @@ scikit-learn
|
|
8 |
lighteval[tensorboardX,adapters]
|
9 |
nest_asyncio
|
10 |
plotly
|
11 |
-
ipywidgets
|
|
|
|
|
|
|
|
|
|
|
|
8 |
lighteval[tensorboardX,adapters]
|
9 |
nest_asyncio
|
10 |
plotly
|
11 |
+
ipywidgets
|
12 |
+
|
13 |
+
# Reflex
|
14 |
+
reflex
|
15 |
+
reflex-ag-grid
|
16 |
+
|