{ "cells": [ { "cell_type": "markdown", "metadata": { "id": "U8RTc2PmnX-v" }, "source": [ "Initial setup" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "kGW7vfRkrqHe" }, "outputs": [], "source": [ "!pip install -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "2I850FIsCVNw" }, "outputs": [], "source": [ "from datetime import datetime\n", "import os\n", "from huggingface_hub import login, upload_folder\n", "from google.colab import userdata\n", "import shutil\n", "\n", "HF_TOKEN = userdata.get('HF_TOKEN')\n", "login(HF_TOKEN, True)\n", "BASE_DATASET='mmlu'\n", "REPO_ID='flunardelli/llm-metaeval'\n", "BASE_FOLDER=f\"/content/{BASE_DATASET}/\"#{datetime.now().strftime('%Y-%m-%dT%H-%M-%S')}\n", "OUTPUT_FOLDER=os.path.join(BASE_FOLDER,'output')\n", "TASK_FOLDER=os.path.join(BASE_FOLDER,'tasks')\n", "#shutil.rmtree(BASE_FOLDER)\n", "os.makedirs(OUTPUT_FOLDER)\n", "os.makedirs(TASK_FOLDER)\n", "os.environ['HF_TOKEN'] = HF_TOKEN\n", "os.environ['OUTPUT_FOLDER'] = OUTPUT_FOLDER\n", "os.environ['TASK_FOLDER'] = TASK_FOLDER\n", "\n", "def hf_upload_folder(folder_path):\n", " upload_folder(\n", " folder_path=folder_path,\n", " path_in_repo=\"evals/\",\n", " repo_id=REPO_ID,\n", " token=HF_TOKEN,\n", " repo_type=\"dataset\"\n", " )\n", "\n", "def create_task(content, filename):\n", " filename_path = os.path.join(TASK_FOLDER,filename)\n", " with open(filename_path, \"w\") as f:\n", " f.write(content)" ] }, { "cell_type": "markdown", "metadata": { "id": "Jd2JwKZaPkNS" }, "source": [ "Create task for MMLU all datasets" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "xP0cC_sHih7C" }, "outputs": [], "source": [ "YAML_mmlu_en_us_string = \"\"\"\n", "task: mmlu_all\n", "dataset_path: cais/mmlu\n", "dataset_name: all\n", "description: \"MMLU dataset\"\n", "test_split: test\n", "fewshot_split: dev\n", "fewshot_config:\n", " sampler: first_n\n", "num_fewshot: 5\n", "output_type: multiple_choice\n", "doc_to_text: \"{{question.strip()}}\\nA. {{choices[0]}}\\nB. {{choices[1]}}\\nC. {{choices[2]}}\\nD. {{choices[3]}}\\nAnswer:\"\n", "doc_to_choice: [\"A\", \"B\", \"C\", \"D\"]\n", "doc_to_target: answer\n", "metric_list:\n", " - metric: acc\n", " aggregation: mean\n", " higher_is_better: true\n", "\"\"\"\n", "create_task(YAML_mmlu_en_us_string, 'mmlu_en_us.yaml')\n", "os.environ['TASKS'] = 'mmlu_all'\n" ] }, { "cell_type": "markdown", "metadata": { "id": "mJjo_A5tP-Td" }, "source": [ "Llama Models" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "IzP5nyP0Gwk8" }, "outputs": [], "source": [ "!lm_eval \\\n", "--model hf --model_args pretrained=meta-llama/Llama-3.2-1B-Instruct,revision=d0a2081ed47e20ce524e8bc5d132f3fad2f69ff0,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n", "--tasks $TASKS \\\n", "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n", "--batch_size auto &> run.log\n", "#--limit 10 \\" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "uMoitxJkHerH" }, "outputs": [], "source": [ "hf_upload_folder(BASE_FOLDER)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "oIACOAhDW5ow" }, "outputs": [], "source": [ "!lm_eval \\\n", "--model hf --model_args pretrained=meta-llama/Llama-3.2-3B-Instruct,revision=392a143b624368100f77a3eafaa4a2468ba50a72,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n", "--tasks $TASKS \\\n", "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n", "--batch_size auto &> run.log\n", "#--limit 10 \\" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "eIUOqu5sHfkM" }, "outputs": [], "source": [ "hf_upload_folder(BASE_FOLDER)" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "cFFYPzBIYGf7" }, "outputs": [], "source": [ "!lm_eval \\\n", "--model hf --model_args pretrained=meta-llama/Meta-Llama-3-8B,revision=62bd457b6fe961a42a631306577e622c83876cb6,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n", "--tasks $TASKS \\\n", "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n", "--batch_size auto &> run.log\n", "#--limit 10 \\" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "xsL82Q4SHgMn" }, "outputs": [], "source": [ "hf_upload_folder(BASE_FOLDER)" ] }, { "cell_type": "markdown", "metadata": { "id": "1fEX-49hQ-Be" }, "source": [ "Mistral Models" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "collapsed": true, "id": "ilu9_ulWTy3p" }, "outputs": [], "source": [ "!lm_eval \\\n", "--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,revision=41bd4c9e7e4fb318ca40e721131d4933966c2cc1,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n", "--tasks $TASKS \\\n", "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n", "--batch_size auto &> run.log\n", "#--limit 10 \\" ] }, { "cell_type": "code", "execution_count": null, "metadata": { "id": "jE5r8gVDHhAz" }, "outputs": [], "source": [ "hf_upload_folder(BASE_FOLDER)" ] } ], "metadata": { "accelerator": "GPU", "colab": { "gpuType": "L4", "machine_shape": "hm", "provenance": [] }, "kernelspec": { "display_name": "Python 3", "name": "python3" }, "language_info": { "name": "python" } }, "nbformat": 4, "nbformat_minor": 0 }