Initial setup

In [None]:
!pip install -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt

In [None]:
from datetime import datetime
import os
from huggingface_hub import login, upload_folder
from google.colab import userdata
import shutil

HF_TOKEN = userdata.get('HF_TOKEN')
login(HF_TOKEN, True)
BASE_DATASET='pub'
REPO_ID='flunardelli/llm-metaeval'
BASE_FOLDER=f"/content/{BASE_DATASET}/"#{datetime.now().strftime('%Y-%m-%dT%H-%M-%S')}
OUTPUT_FOLDER=os.path.join(BASE_FOLDER,'output')
TASK_FOLDER=os.path.join(BASE_FOLDER,'tasks')
#shutil.rmtree(BASE_FOLDER)
os.makedirs(OUTPUT_FOLDER)
os.makedirs(TASK_FOLDER)
os.environ['HF_TOKEN'] = HF_TOKEN
os.environ['OUTPUT_FOLDER'] = OUTPUT_FOLDER
os.environ['TASK_FOLDER'] = TASK_FOLDER

def hf_upload_folder(folder_path):
 upload_folder(
 folder_path=folder_path,
 path_in_repo="evals/",
 repo_id=REPO_ID,
 token=HF_TOKEN,
 repo_type="dataset"
 )

def create_task(content, filename):
 filename_path = os.path.join(TASK_FOLDER,filename)
 with open(filename_path, "w") as f:
 f.write(content)

Create task for PUB all datasets

In [None]:
YAML_template_pub_tasks = [
 ("task_1", 2),
 ("task_2", 5),
 ("task_3", 5),
 ("task_4", 3),
 ("task_5", 2),
 ("task_6", 2),
 ("task_7", 2),
 ("task_8", 2),
 ("task_9", 2),
 ("task_10", 3),
 ("task_11", 3),
 ("task_12", 2),
 ("task_13", 2),
 ("task_14", 4)
]

default_doc_to_text = "{{pretext.strip()}}\n {{options[0]}}\n{{options[1]}}\\n{{options[2]}}\\n{{options[3]}}\\n{{options[4]}}\\nAnswer:"


YAML_template_pub_base = """
task: __task_name__
dataset_path: flunardelli/PUB
dataset_name: __dataset_name__
description: "PUB"
test_split: test
fewshot_split: test
fewshot_config:
 sampler: first_n
num_fewshot: 5
output_type: multiple_choice
doc_to_text: "{{pretext.strip()}}\n Options:\n__options__\nAnswer:"
doc_to_choice: "{{options}}"
doc_to_target: "correct answer"
metric_list:
 - metric: acc
 aggregation: mean
 higher_is_better: true
"""
tasks = []
for t in YAML_template_pub_tasks:
 dataset_name, num_choices = t
 task_name = f"pub_{dataset_name}"
 tasks.append(task_name)
 templace_choices = '\n'.join(["{{options[__i__]}}".replace('__i__',str(i)) for i in range(num_choices)])
 template = (YAML_template_pub_base
 .replace('__options__',templace_choices)
 .replace('__dataset_name__',dataset_name).replace('__task_name__',task_name)
 )
 create_task(template, f"pub_{dataset_name}.yaml")

os.environ['TASKS'] = ','.join(tasks)

Llama Models

In [None]:
!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \
--model hf --model_args pretrained=meta-llama/Llama-3.2-1B-Instruct,revision=d0a2081ed47e20ce524e8bc5d132f3fad2f69ff0,trust_remote_code=False,dtype=bfloat16,parallelize=True \
--tasks $i \
--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \
--batch_size auto; done &> run.log

In [None]:
hf_upload_folder(BASE_FOLDER)

In [None]:
!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \
--model hf --model_args pretrained=meta-llama/Llama-3.2-3B-Instruct,revision=392a143b624368100f77a3eafaa4a2468ba50a72,trust_remote_code=False,dtype=bfloat16,parallelize=False \
--tasks $i \
--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \
--batch_size auto; done &> run.log

In [None]:
hf_upload_folder(BASE_FOLDER)

In [None]:
!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \
--model hf --model_args pretrained=meta-llama/Meta-Llama-3-8B,revision=62bd457b6fe961a42a631306577e622c83876cb6,trust_remote_code=False,dtype=bfloat16,parallelize=False \
--tasks $i \
--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \
--batch_size auto; done &> run.log

In [None]:
hf_upload_folder(BASE_FOLDER)

Mistral Models

In [None]:
!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \
--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,revision=41bd4c9e7e4fb318ca40e721131d4933966c2cc1,trust_remote_code=False,dtype=bfloat16,parallelize=True \
--tasks $i \
--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \
--batch_size auto; done &> run.log

In [None]:
hf_upload_folder(BASE_FOLDER)

In [None]:
!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \
--model hf --model_args pretrained=mistralai/Mixtral-8x22B-v0.1,revision=b03e260818710044a2f088d88fab12bb220884fb,trust_remote_code=False,dtype=bfloat16,parallelize=True \
--tasks $i \
--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \
--batch_size auto; done &> run.log

Save output results

In [None]:
hf_upload_folder(BASE_FOLDER)