{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4",
      "machine_shape": "hm"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "Initial setup"
      ],
      "metadata": {
        "id": "U8RTc2PmnX-v"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt"
      ],
      "metadata": {
        "id": "kGW7vfRkrqHe"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from datetime import datetime\n",
        "import os\n",
        "from huggingface_hub import login, upload_folder\n",
        "from google.colab import userdata\n",
        "import shutil\n",
        "\n",
        "HF_TOKEN = userdata.get('HF_TOKEN')\n",
        "login(HF_TOKEN, True)\n",
        "BASE_DATASET='pub'\n",
        "REPO_ID='flunardelli/llm-metaeval'\n",
        "BASE_FOLDER=f\"/content/{BASE_DATASET}/\"#{datetime.now().strftime('%Y-%m-%dT%H-%M-%S')}\n",
        "OUTPUT_FOLDER=os.path.join(BASE_FOLDER,'output')\n",
        "TASK_FOLDER=os.path.join(BASE_FOLDER,'tasks')\n",
        "#shutil.rmtree(BASE_FOLDER)\n",
        "os.makedirs(OUTPUT_FOLDER)\n",
        "os.makedirs(TASK_FOLDER)\n",
        "os.environ['HF_TOKEN'] = HF_TOKEN\n",
        "os.environ['OUTPUT_FOLDER'] = OUTPUT_FOLDER\n",
        "os.environ['TASK_FOLDER'] = TASK_FOLDER\n",
        "\n",
        "def hf_upload_folder(folder_path):\n",
        "  upload_folder(\n",
        "      folder_path=folder_path,\n",
        "      path_in_repo=\"evals/\",\n",
        "      repo_id=REPO_ID,\n",
        "      token=HF_TOKEN,\n",
        "      repo_type=\"dataset\"\n",
        "  )\n",
        "\n",
        "def create_task(content, filename):\n",
        "  filename_path = os.path.join(TASK_FOLDER,filename)\n",
        "  with open(filename_path, \"w\") as f:\n",
        "    f.write(content)"
      ],
      "metadata": {
        "id": "IHxFvAC4eSnW"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Create task for PUB all datasets"
      ],
      "metadata": {
        "id": "Jd2JwKZaPkNS"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "YAML_template_pub_tasks = [\n",
        "    (\"task_1\", 2),\n",
        "    (\"task_2\", 5),\n",
        "    (\"task_3\", 5),\n",
        "    (\"task_4\", 3),\n",
        "    (\"task_5\", 2),\n",
        "    (\"task_6\", 2),\n",
        "    (\"task_7\", 2),\n",
        "    (\"task_8\", 2),\n",
        "    (\"task_9\", 2),\n",
        "    (\"task_10\", 3),\n",
        "    (\"task_11\", 3),\n",
        "    (\"task_12\", 2),\n",
        "    (\"task_13\", 2),\n",
        "    (\"task_14\", 4)\n",
        "]\n",
        "\n",
        "default_doc_to_text = \"{{pretext.strip()}}\\n {{options[0]}}\\n{{options[1]}}\\\\n{{options[2]}}\\\\n{{options[3]}}\\\\n{{options[4]}}\\\\nAnswer:\"\n",
        "\n",
        "\n",
        "YAML_template_pub_base = \"\"\"\n",
        "task: __task_name__\n",
        "dataset_path: flunardelli/PUB\n",
        "dataset_name: __dataset_name__\n",
        "description: \"PUB\"\n",
        "test_split: test\n",
        "fewshot_split: test\n",
        "fewshot_config:\n",
        "  sampler: first_n\n",
        "num_fewshot: 5\n",
        "output_type: multiple_choice\n",
        "doc_to_text: \"{{pretext.strip()}}\\n Options:\\n__options__\\nAnswer:\"\n",
        "doc_to_choice: \"{{options}}\"\n",
        "doc_to_target: \"correct answer\"\n",
        "metric_list:\n",
        "  - metric: acc\n",
        "    aggregation: mean\n",
        "    higher_is_better: true\n",
        "\"\"\"\n",
        "tasks = []\n",
        "for t in YAML_template_pub_tasks:\n",
        "  dataset_name, num_choices = t\n",
        "  task_name = f\"pub_{dataset_name}\"\n",
        "  tasks.append(task_name)\n",
        "  templace_choices = '\\n'.join([\"{{options[__i__]}}\".replace('__i__',str(i)) for i in range(num_choices)])\n",
        "  template = (YAML_template_pub_base\n",
        "              .replace('__options__',templace_choices)\n",
        "              .replace('__dataset_name__',dataset_name).replace('__task_name__',task_name)\n",
        "              )\n",
        "  create_task(template, f\"pub_{dataset_name}.yaml\")\n",
        "\n",
        "os.environ['TASKS'] = ','.join(tasks)"
      ],
      "metadata": {
        "id": "xP0cC_sHih7C"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Llama Models"
      ],
      "metadata": {
        "id": "mJjo_A5tP-Td"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n",
        "--model hf --model_args pretrained=meta-llama/Llama-3.2-1B-Instruct,revision=d0a2081ed47e20ce524e8bc5d132f3fad2f69ff0,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n",
        "--tasks $i \\\n",
        "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
        "--batch_size auto; done &> run.log"
      ],
      "metadata": {
        "id": "NOwy6ZlY3Mw7"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "hf_upload_folder(BASE_FOLDER)"
      ],
      "metadata": {
        "id": "v-7drt76r9wG"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n",
        "--model hf --model_args pretrained=meta-llama/Llama-3.2-3B-Instruct,revision=392a143b624368100f77a3eafaa4a2468ba50a72,trust_remote_code=False,dtype=bfloat16,parallelize=False \\\n",
        "--tasks $i \\\n",
        "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
        "--batch_size auto; done &> run.log"
      ],
      "metadata": {
        "id": "oIACOAhDW5ow"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "hf_upload_folder(BASE_FOLDER)"
      ],
      "metadata": {
        "id": "XowpCSOHr-qr"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n",
        "--model hf --model_args pretrained=meta-llama/Meta-Llama-3-8B,revision=62bd457b6fe961a42a631306577e622c83876cb6,trust_remote_code=False,dtype=bfloat16,parallelize=False \\\n",
        "--tasks $i \\\n",
        "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
        "--batch_size auto; done &> run.log"
      ],
      "metadata": {
        "id": "1Nxw4WNxZUyb"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "hf_upload_folder(BASE_FOLDER)"
      ],
      "metadata": {
        "id": "aNx_r4ZBr_ZW"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Mistral Models"
      ],
      "metadata": {
        "id": "1fEX-49hQ-Be"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n",
        "--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,revision=41bd4c9e7e4fb318ca40e721131d4933966c2cc1,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n",
        "--tasks $i \\\n",
        "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
        "--batch_size auto; done &> run.log"
      ],
      "metadata": {
        "id": "E3dBWV1V9C-O"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "hf_upload_folder(BASE_FOLDER)"
      ],
      "metadata": {
        "id": "NcGYz2g7sKe7"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!for i in $(echo $TASKS|tr ',' ' '); do lm_eval \\\n",
        "--model hf --model_args pretrained=mistralai/Mixtral-8x22B-v0.1,revision=b03e260818710044a2f088d88fab12bb220884fb,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n",
        "--tasks $i \\\n",
        "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --log_samples \\\n",
        "--batch_size auto; done &> run.log"
      ],
      "metadata": {
        "id": "LPqTo2z29RKx"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Save output results"
      ],
      "metadata": {
        "id": "U8qh9BEbgBy7"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "hf_upload_folder(BASE_FOLDER)"
      ],
      "metadata": {
        "id": "ZQl05b1rf83u"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [],
      "metadata": {
        "id": "ZUTPHnV0kMB1"
      }
    }
  ]
}