{
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "provenance": [],
      "gpuType": "T4"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "Initial setup"
      ],
      "metadata": {
        "id": "U8RTc2PmnX-v"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!pip install -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt"
      ],
      "metadata": {
        "id": "kGW7vfRkrqHe"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "from huggingface_hub import notebook_login\n",
        "notebook_login()"
      ],
      "metadata": {
        "id": "2I850FIsCVNw"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Create task for PUB all datasets"
      ],
      "metadata": {
        "id": "Jd2JwKZaPkNS"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "YAML_template_pub_tasks = [\n",
        "    (\"task_1\", 2),\n",
        "    (\"task_2\", 5),\n",
        "    (\"task_3\", 5),\n",
        "    (\"task_4\", 3),\n",
        "    (\"task_5\", 2),\n",
        "    (\"task_6\", 2),\n",
        "    (\"task_7\", 2),\n",
        "    (\"task_8\", 2),\n",
        "    (\"task_9\", 2),\n",
        "    (\"task_10\", 3),\n",
        "    (\"task_11\", 3),\n",
        "    (\"task_12\", 2),\n",
        "    (\"task_13\", 2),\n",
        "    (\"task_14\", 4)\n",
        "]\n",
        "\n",
        "default_doc_to_text = \"{{pretext.strip()}}\\n {{options[0]}}\\n{{options[1]}}\\\\n{{options[2]}}\\\\n{{options[3]}}\\\\n{{options[4]}}\\\\nAnswer:\"\n",
        "\n",
        "\n",
        "YAML_template_pub_base = \"\"\"\n",
        "task: __task_name__\n",
        "dataset_path: flunardelli/PUB\n",
        "dataset_name: __dataset_name__\n",
        "description: \"PUB\"\n",
        "test_split: test\n",
        "fewshot_split: test\n",
        "fewshot_config:\n",
        "  sampler: first_n\n",
        "num_fewshot: 10\n",
        "output_type: multiple_choice\n",
        "doc_to_text: \"{{pretext.strip()}}\\n Options:\\n__options__\\nAnswer:\"\n",
        "doc_to_choice: \"{{options}}\"\n",
        "doc_to_target: \"correct answer\"\n",
        "metric_list:\n",
        "  - metric: acc\n",
        "    aggregation: mean\n",
        "    higher_is_better: true\n",
        "  - metric: acc_norm\n",
        "    aggregation: mean\n",
        "    higher_is_better: true\n",
        "\"\"\"\n",
        "tasks = []\n",
        "for t in YAML_template_pub_tasks:\n",
        "  dataset_name, num_choices = t\n",
        "  task_name = f\"pub_{dataset_name}\"\n",
        "  tasks.append(task_name)\n",
        "  templace_choices = '\\n'.join([\"{{options[__i__]}}\".replace('__i__',str(i)) for i in range(num_choices)])\n",
        "  template = (YAML_template_pub_base\n",
        "              .replace('__options__',templace_choices)\n",
        "              .replace('__dataset_name__',dataset_name).replace('__task_name__',task_name)\n",
        "              )\n",
        "  with open(f\"pub_{dataset_name}.yaml\", \"w\") as f:\n",
        "    f.write(template)\n",
        "\n",
        "','.join(tasks)"
      ],
      "metadata": {
        "id": "xP0cC_sHih7C",
        "colab": {
          "base_uri": "https://localhost:8080/",
          "height": 35
        },
        "outputId": "fcf3ed9e-1422-47f3-e234-016435c8b212"
      },
      "execution_count": 1,
      "outputs": [
        {
          "output_type": "execute_result",
          "data": {
            "text/plain": [
              "'pub_task_1,pub_task_2,pub_task_3,pub_task_4,pub_task_5,pub_task_6,pub_task_7,pub_task_8,pub_task_9,pub_task_10,pub_task_11,pub_task_12,pub_task_13,pub_task_14'"
            ],
            "application/vnd.google.colaboratory.intrinsic+json": {
              "type": "string"
            }
          },
          "metadata": {},
          "execution_count": 1
        }
      ]
    },
    {
      "cell_type": "markdown",
      "source": [
        "Llama Models"
      ],
      "metadata": {
        "id": "mJjo_A5tP-Td"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!lm_eval --model hf \\\n",
        "  --model_args pretrained=meta-llama/Llama-3.2-1B-Instruct \\\n",
        "  --include_path ./ \\\n",
        "  --tasks pub_task_1,pub_task_2,pub_task_3,pub_task_4,pub_task_5,pub_task_6,pub_task_7,pub_task_8,pub_task_9,pub_task_10,pub_task_11,pub_task_12,pub_task_13,pub_task_14 \\\n",
        "  --output output/pub/ \\\n",
        "  --use_cache cache \\\n",
        "  --device cuda:0 \\\n",
        "  --log_samples\n",
        "  #  --limit 10\n"
      ],
      "metadata": {
        "id": "IzP5nyP0Gwk8"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!lm_eval --model hf \\\n",
        "  --model_args pretrained=meta-llama/Llama-3.2-3B-Instruct \\\n",
        "  --include_path ./ \\\n",
        "  --tasks pub_task_1,pub_task_2,pub_task_3,pub_task_4,pub_task_5,pub_task_6,pub_task_7,pub_task_8,pub_task_9,pub_task_10,pub_task_11,pub_task_12,pub_task_13,pub_task_14 \\\n",
        "  --output output/pub/ \\\n",
        "  --use_cache cache \\\n",
        "  --device cuda:0 \\\n",
        "  --log_samples\n",
        "  #  --limit 10"
      ],
      "metadata": {
        "id": "oIACOAhDW5ow"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!lm_eval --model hf \\\n",
        "  --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1  \\\n",
        "  --include_path ./ \\\n",
        "  --tasks pub_task_1,pub_task_2,pub_task_3,pub_task_4,pub_task_5,pub_task_6,pub_task_7,pub_task_8,pub_task_9,pub_task_10,pub_task_11,pub_task_12,pub_task_13,pub_task_14 \\\n",
        "  --output output/pub/ \\\n",
        "  --use_cache cache \\\n",
        "  --device cuda:0 \\\n",
        "  --log_samples\n",
        "  #  --limit 10"
      ],
      "metadata": {
        "id": "1Nxw4WNxZUyb"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "!lm_eval --model hf \\\n",
        "  --model_args pretrained=meta-llama/Meta-Llama-3-8B \\\n",
        "  --include_path ./ \\\n",
        "  --tasks pub_task_1,pub_task_2,pub_task_3,pub_task_4,pub_task_5,pub_task_6,pub_task_7,pub_task_8,pub_task_9,pub_task_10,pub_task_11,pub_task_12,pub_task_13,pub_task_14 \\\n",
        "  --output output/pub/ \\\n",
        "  --use_cache cache \\\n",
        "  --device cuda:0 \\\n",
        "  --log_samples\n",
        "  #  --limit 10"
      ],
      "metadata": {
        "id": "cFFYPzBIYGf7"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Mistral Models"
      ],
      "metadata": {
        "id": "1fEX-49hQ-Be"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "!lm_eval --model hf \\\n",
        "  --model_args pretrained=mistralai/Mistral-7B-v0.1 \\\n",
        "  --include_path ./ \\\n",
        "  --tasks pub_task_1,pub_task_2,pub_task_3,pub_task_4,pub_task_5,pub_task_6,pub_task_7,pub_task_8,pub_task_9,pub_task_10,pub_task_11,pub_task_12,pub_task_13,pub_task_14 \\\n",
        "  --output output/pub/ \\\n",
        "  --use_cache cache \\\n",
        "  --device cuda:0 \\\n",
        "  --log_samples\n",
        "  #  --limit 10"
      ],
      "metadata": {
        "id": "3cHI2qxN2fJ0"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [],
      "metadata": {
        "id": "ZUTPHnV0kMB1"
      }
    }
  ]
}