{ "nbformat": 4, "nbformat_minor": 0, "metadata": { "colab": { "provenance": [], "gpuType": "T4", "machine_shape": "hm" }, "kernelspec": { "name": "python3", "display_name": "Python 3" }, "language_info": { "name": "python" }, "accelerator": "GPU" }, "cells": [ { "cell_type": "markdown", "source": [ "Initial setup" ], "metadata": { "id": "U8RTc2PmnX-v" } }, { "cell_type": "code", "source": [ "!pip install -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt" ], "metadata": { "id": "kGW7vfRkrqHe" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "from datetime import datetime\n", "import os\n", "from huggingface_hub import login, upload_folder\n", "from google.colab import userdata\n", "import shutil\n", "\n", "HF_TOKEN = userdata.get('HF_TOKEN')\n", "login(HF_TOKEN, True)\n", "BASE_DATASET='pub'\n", "REPO_ID='flunardelli/llm-metaeval'\n", "BASE_FOLDER=f\"/content/{BASE_DATASET}/\"#{datetime.now().strftime('%Y-%m-%dT%H-%M-%S')}\n", "OUTPUT_FOLDER=os.path.join(BASE_FOLDER,'output')\n", "TASK_FOLDER=os.path.join(BASE_FOLDER,'tasks')\n", "#shutil.rmtree(BASE_FOLDER)\n", "os.makedirs(OUTPUT_FOLDER)\n", "os.makedirs(TASK_FOLDER)\n", "os.environ['HF_TOKEN'] = HF_TOKEN\n", "os.environ['OUTPUT_FOLDER'] = OUTPUT_FOLDER\n", "os.environ['TASK_FOLDER'] = TASK_FOLDER\n", "\n", "def hf_upload_folder(folder_path):\n", " upload_folder(\n", " folder_path=folder_path,\n", " path_in_repo=\"evals/\",\n", " repo_id=REPO_ID,\n", " token=HF_TOKEN,\n", " repo_type=\"dataset\"\n", " )\n", "\n", "def create_task(content, filename):\n", " filename_path = os.path.join(TASK_FOLDER,filename)\n", " with open(filename_path, \"w\") as f:\n", " f.write(content)" ], "metadata": { "id": "IHxFvAC4eSnW" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Create task for PUB all datasets" ], "metadata": { "id": "Jd2JwKZaPkNS" } }, { "cell_type": "code", "source": [ "YAML_template_pub_tasks = [\n", " (\"task_1\", 2),\n", " (\"task_2\", 5),\n", " (\"task_3\", 5),\n", " (\"task_4\", 3),\n", " (\"task_5\", 2),\n", " (\"task_6\", 2),\n", " (\"task_7\", 2),\n", " (\"task_8\", 2),\n", " (\"task_9\", 2),\n", " (\"task_10\", 3),\n", " (\"task_11\", 3),\n", " (\"task_12\", 2),\n", " (\"task_13\", 2),\n", " (\"task_14\", 4)\n", "]\n", "\n", "default_doc_to_text = \"{{pretext.strip()}}\\n {{options[0]}}\\n{{options[1]}}\\\\n{{options[2]}}\\\\n{{options[3]}}\\\\n{{options[4]}}\\\\nAnswer:\"\n", "\n", "\n", "YAML_template_pub_base = \"\"\"\n", "task: __task_name__\n", "dataset_path: flunardelli/PUB\n", "dataset_name: __dataset_name__\n", "description: \"PUB\"\n", "test_split: test\n", "fewshot_split: test\n", "fewshot_config:\n", " sampler: first_n\n", "num_fewshot: 5\n", "output_type: multiple_choice\n", "doc_to_text: \"{{pretext.strip()}}\\n Options:\\n__options__\\nAnswer:\"\n", "doc_to_choice: \"{{options}}\"\n", "doc_to_target: \"correct answer\"\n", "metric_list:\n", " - metric: acc\n", " aggregation: mean\n", " higher_is_better: true\n", "\"\"\"\n", "tasks = []\n", "for t in YAML_template_pub_tasks:\n", " dataset_name, num_choices = t\n", " task_name = f\"pub_{dataset_name}\"\n", " tasks.append(task_name)\n", " templace_choices = '\\n'.join([\"{{options[__i__]}}\".replace('__i__',str(i)) for i in range(num_choices)])\n", " template = (YAML_template_pub_base\n", " .replace('__options__',templace_choices)\n", " .replace('__dataset_name__',dataset_name).replace('__task_name__',task_name)\n", " )\n", " create_task(template, f\"pub_{dataset_name}.yaml\")\n", "\n", "os.environ['TASKS'] = ','.join(tasks)" ], "metadata": { "id": "xP0cC_sHih7C" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Llama Models" ], "metadata": { "id": "mJjo_A5tP-Td" } }, { "cell_type": "code", "source": [ "!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n", "--model hf --model_args pretrained=meta-llama/Llama-3.2-1B-Instruct,revision=d0a2081ed47e20ce524e8bc5d132f3fad2f69ff0,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n", "--tasks $i \\\n", "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n", "--batch_size auto; done &> run.log" ], "metadata": { "id": "NOwy6ZlY3Mw7" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "hf_upload_folder(BASE_FOLDER)" ], "metadata": { "id": "v-7drt76r9wG" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n", "--model hf --model_args pretrained=meta-llama/Llama-3.2-3B-Instruct,revision=392a143b624368100f77a3eafaa4a2468ba50a72,trust_remote_code=False,dtype=bfloat16,parallelize=False \\\n", "--tasks $i \\\n", "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n", "--batch_size auto; done &> run.log" ], "metadata": { "id": "oIACOAhDW5ow" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "hf_upload_folder(BASE_FOLDER)" ], "metadata": { "id": "XowpCSOHr-qr" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n", "--model hf --model_args pretrained=meta-llama/Meta-Llama-3-8B,revision=62bd457b6fe961a42a631306577e622c83876cb6,trust_remote_code=False,dtype=bfloat16,parallelize=False \\\n", "--tasks $i \\\n", "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n", "--batch_size auto; done &> run.log" ], "metadata": { "id": "1Nxw4WNxZUyb" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "hf_upload_folder(BASE_FOLDER)" ], "metadata": { "id": "aNx_r4ZBr_ZW" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Mistral Models" ], "metadata": { "id": "1fEX-49hQ-Be" } }, { "cell_type": "code", "source": [ "!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n", "--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,revision=41bd4c9e7e4fb318ca40e721131d4933966c2cc1,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n", "--tasks $i \\\n", "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n", "--batch_size auto; done &> run.log" ], "metadata": { "id": "E3dBWV1V9C-O" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "hf_upload_folder(BASE_FOLDER)" ], "metadata": { "id": "NcGYz2g7sKe7" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "source": [ "!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n", "--model hf --model_args pretrained=mistralai/Mixtral-8x22B-v0.1,revision=b03e260818710044a2f088d88fab12bb220884fb,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n", "--tasks $i \\\n", "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n", "--batch_size auto; done &> run.log" ], "metadata": { "id": "LPqTo2z29RKx" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [ "Save output results" ], "metadata": { "id": "U8qh9BEbgBy7" } }, { "cell_type": "code", "source": [ "hf_upload_folder(BASE_FOLDER)" ], "metadata": { "id": "ZQl05b1rf83u" }, "execution_count": null, "outputs": [] }, { "cell_type": "markdown", "source": [], "metadata": { "id": "ZUTPHnV0kMB1" } } ] }