{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "U8RTc2PmnX-v" }, "source": [ "Initial setup" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "kGW7vfRkrqHe" }, "outputs": [], "source": [ "!pip install -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "2I850FIsCVNw" }, "outputs": [], "source": [ "from datetime import datetime\n", "import os\n", "from huggingface_hub import login, upload_folder\n", "from google.colab import userdata\n", "import shutil\n", "\n", "HF_TOKEN = userdata.get('HUGGING_FACE_WRITE_TOKEN')\n", "login(HF_TOKEN, True)\n", "BASE_DATASET='mmlu'\n", "REPO_ID='flunardelli/llm-metaeval'\n", "BASE_FOLDER=f\"/content/{BASE_DATASET}/\"#{datetime.now().strftime('%Y-%m-%dT%H-%M-%S')}\n", "OUTPUT_FOLDER=os.path.join(BASE_FOLDER,'output')\n", "TASK_FOLDER=os.path.join(BASE_FOLDER,'tasks')\n", "#shutil.rmtree(BASE_FOLDER)\n", "os.makedirs(OUTPUT_FOLDER)\n", "os.makedirs(TASK_FOLDER)\n", "os.environ['HF_TOKEN'] = HF_TOKEN\n", "os.environ['OUTPUT_FOLDER'] = OUTPUT_FOLDER\n", "os.environ['TASK_FOLDER'] = TASK_FOLDER\n", "\n", "def hf_upload_folder(folder_path):\n", " upload_folder(\n", " folder_path=folder_path,\n", " path_in_repo=\"evals/\",\n", " repo_id=REPO_ID,\n", " token=HF_TOKEN,\n", " repo_type=\"dataset\"\n", " )\n", "\n", "def create_task(content, filename):\n", " filename_path = os.path.join(TASK_FOLDER,filename)\n", " with open(filename_path, \"w\") as f:\n", " f.write(content)" ] }, { "cell_type": "markdown", "metadata": { "id": "Jd2JwKZaPkNS" }, "source": [ "Create task for MMLU all datasets" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "xP0cC_sHih7C" }, "outputs": [], "source": [ "YAML_mmlu_en_us_string = \"\"\"\n", "task: mmlu_all\n", "dataset_path: cais/mmlu\n", "dataset_name: all\n", "description: \"MMLU dataset\"\n", "test_split: test\n", "fewshot_split: dev\n", "fewshot_config:\n", " sampler: first_n\n", "output_type: multiple_choice\n", "doc_to_text: \"{{question.strip()}}\\nA. {{choices[0]}}\\nB. {{choices[1]}}\\nC. {{choices[2]}}\\nD. {{choices[3]}}\\nAnswer:\"\n", "doc_to_choice: [\"A\", \"B\", \"C\", \"D\"]\n", "doc_to_target: answer\n", "metric_list:\n", " - metric: acc\n", " aggregation: mean\n", " higher_is_better: true\n", " - metric: acc_norm\n", " aggregation: mean\n", " higher_is_better: true\n", "\"\"\"\n", "create_task(YAML_mmlu_en_us_string, 'mmlu_en_us.yaml')\n" ] }, { "cell_type": "markdown", "metadata": { "id": "mJjo_A5tP-Td" }, "source": [ "Llama Models" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "IzP5nyP0Gwk8" }, "outputs": [], "source": [ "!accelerate launch -m lm_eval \\\n", "--model hf --model_args pretrained=meta-llama/Llama-3.2-1B-Instruct,parallelize=True \\\n", "--tasks mmlu_all \\\n", "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n", "--batch_size 16\n", "#--limit 10 \\" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "oIACOAhDW5ow" }, "outputs": [], "source": [ "!accelerate launch -m lm_eval \\\n", "--model hf --model_args pretrained=meta-llama/Llama-3.2-3B-Instruct,parallelize=True \\\n", "--tasks mmlu_all \\\n", "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n", "--batch_size 16\n", "#--limit 10 \\" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "cFFYPzBIYGf7" }, "outputs": [], "source": [ "!accelerate launch -m lm_eval \\\n", "--model hf --model_args pretrained=meta-llama/Meta-Llama-3-8B,parallelize=True \\\n", "--tasks mmlu_all \\\n", "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n", "--batch_size 16\n", "#--limit 10 \\" ] }, { "cell_type": "markdown", "metadata": { "id": "1fEX-49hQ-Be" }, "source": [ "Mistral Models" ] }, { "cell_type": "code", "source": [ "!accelerate launch -m lm_eval \\\n", "--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,parallelize=True \\\n", "--tasks mmlu_all \\\n", "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n", "--batch_size 16\n", "#--limit 10 \\" ], "metadata": { "id": "ilu9_ulWTy3p" }, "execution_count": null, "outputs": [] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "3cHI2qxN2fJ0" }, "outputs": [], "source": [ "!accelerate launch -m lm_eval \\\n", "--model hf --model_args pretrained=mistralai/Mixtral-8x22B-v0.1,parallelize=True \\\n", "--tasks mmlu_all \\\n", "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n", "--batch_size 16\n", "#--limit 10 \\" ] }, { "cell_type": "markdown", "metadata": { "id": "ZUTPHnV0kMB1" }, "source": [ "Save output results" ] }, { "cell_type": "code", "source": [ "hf_upload_folder(BASE_FOLDER)" ], "metadata": { "id": "mGGdqBNBzFYL" }, "execution_count": null, "outputs": [] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "T4", "provenance": [], "machine_shape": "hm" }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }