alexmarques
commited on
Commit
•
69291a3
1
Parent(s):
caf66fe
Update README.md
Browse files
README.md
CHANGED
@@ -191,7 +191,7 @@ This version of the lm-evaluation-harness includes versions of ARC-Challenge and
|
|
191 |
</td>
|
192 |
</tr>
|
193 |
<tr>
|
194 |
-
<td>TruthfulQA (0-shot)
|
195 |
</td>
|
196 |
<td>54.04
|
197 |
</td>
|
@@ -243,6 +243,7 @@ lm_eval \
|
|
243 |
--model vllm \
|
244 |
--model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
|
245 |
--tasks gsm8k_cot_llama_3.1_instruct \
|
|
|
246 |
--apply_chat_template \
|
247 |
--num_fewshot 8 \
|
248 |
--batch_size auto
|
@@ -268,12 +269,12 @@ lm_eval \
|
|
268 |
--batch_size auto
|
269 |
```
|
270 |
|
271 |
-
####
|
272 |
```
|
273 |
lm_eval \
|
274 |
--model vllm \
|
275 |
--model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
|
276 |
-
--tasks
|
277 |
--num_fewshot 0 \
|
278 |
--batch_size auto
|
279 |
```
|
|
|
191 |
</td>
|
192 |
</tr>
|
193 |
<tr>
|
194 |
+
<td>TruthfulQA (0-shot, mc2)
|
195 |
</td>
|
196 |
<td>54.04
|
197 |
</td>
|
|
|
243 |
--model vllm \
|
244 |
--model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
|
245 |
--tasks gsm8k_cot_llama_3.1_instruct \
|
246 |
+
--fewshot_as_multiturn \
|
247 |
--apply_chat_template \
|
248 |
--num_fewshot 8 \
|
249 |
--batch_size auto
|
|
|
269 |
--batch_size auto
|
270 |
```
|
271 |
|
272 |
+
#### TruthfulQA
|
273 |
```
|
274 |
lm_eval \
|
275 |
--model vllm \
|
276 |
--model_args pretrained="neuralmagic/Meta-Llama-3.1-8B-Instruct-quantized.w8a16",dtype=auto,add_bos_token=True,max_model_len=4096,tensor_parallel_size=1 \
|
277 |
+
--tasks truthfulqa \
|
278 |
--num_fewshot 0 \
|
279 |
--batch_size auto
|
280 |
```
|