flunardelli
/

llm-metaeval

Model card Files Files and versions Community

flunardelli commited on Nov 24, 2024

Commit

8e20010

verified ·

1 Parent(s): b8ee329

Update llm_metaeval_eval_harness_mmlu.ipynb

Browse files

Files changed (1) hide show

llm_metaeval_eval_harness_mmlu.ipynb +148 -119

llm_metaeval_eval_harness_mmlu.ipynb CHANGED Viewed

@@ -1,70 +1,90 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": [],
-      "gpuType": "T4"
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    },
-    "accelerator": "GPU"
-  },
   "cells": [
     {
       "cell_type": "markdown",
-      "source": [
-        "Initial setup"
-      ],
       "metadata": {
         "id": "U8RTc2PmnX-v"
-      }
     },
     {
       "cell_type": "code",
-      "source": [
-        "!pip install -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt"
-      ],
       "metadata": {
         "id": "kGW7vfRkrqHe"
       },
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
-      "source": [
-        "from huggingface_hub import notebook_login\n",
-        "notebook_login()"
-      ],
       "metadata": {
         "id": "2I850FIsCVNw"
       },
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "markdown",
-      "source": [
-        "Create task for MMLU all datasets"
-      ],
       "metadata": {
         "id": "Jd2JwKZaPkNS"
-      }
     },
     {
       "cell_type": "code",
       "source": [
         "YAML_mmlu_en_us_string = \"\"\"\n",
         "task: mmlu_all\n",
         "dataset_path: cais/mmlu\n",
         "dataset_name: all\n",
-        "description: \"MMLU dataset in English\"\n",
         "test_split: test\n",
         "fewshot_split: dev\n",
         "fewshot_config:\n",
@@ -81,134 +101,143 @@
         "    aggregation: mean\n",
         "    higher_is_better: true\n",
         "\"\"\"\n",
-        "with open(\"mmlu_en_us.yaml\", \"w\") as f:\n",
-        "    f.write(YAML_mmlu_en_us_string)"
-      ],
-      "metadata": {
-        "id": "xP0cC_sHih7C"
-      },
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "markdown",
-      "source": [
-        "Llama Models"
-      ],
       "metadata": {
         "id": "mJjo_A5tP-Td"
-      }
     },
     {
       "cell_type": "code",
-      "source": [
-        "!lm_eval --model hf \\\n",
-        "  --model_args pretrained=meta-llama/Llama-3.2-1B-Instruct \\\n",
-        "  --include_path ./ \\\n",
-        "  --tasks mmlu_all \\\n",
-        "  --output output/mmlu/ \\\n",
-        "  --use_cache cache \\\n",
-        "  --device cuda:0 \\\n",
-        "  --log_samples\n",
-        "  #  --limit 10\n"
-      ],
       "metadata": {
         "id": "IzP5nyP0Gwk8"
       },
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
-      "source": [
-        "!lm_eval --model hf \\\n",
-        "  --model_args pretrained=meta-llama/Llama-3.2-3B-Instruct \\\n",
-        "  --include_path ./ \\\n",
-        "  --tasks mmlu_all \\\n",
-        "  --output output/mmlu/ \\\n",
-        "  --use_cache cache \\\n",
-        "  --device cuda:0 \\\n",
-        "  --log_samples\n",
-        "  #  --limit 10"
-      ],
       "metadata": {
         "id": "oIACOAhDW5ow"
       },
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "source": [
-        "!lm_eval --model hf \\\n",
-        "  --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1  \\\n",
-        "  --include_path ./ \\\n",
-        "  --tasks mmlu_all \\\n",
-        "  --output output/mmlu/ \\\n",
-        "  --use_cache cache \\\n",
-        "  --device cuda:0 \\\n",
-        "  --log_samples\n",
-        "  #  --limit 10"
-      ],
       "metadata": {
-        "id": "1Nxw4WNxZUyb"
       },
-      "execution_count": null,
-      "outputs": []
     },
     {
       "cell_type": "code",
       "source": [
-        "!lm_eval --model hf \\\n",
-        "  --model_args pretrained=meta-llama/Meta-Llama-3-8B \\\n",
-        "  --include_path ./ \\\n",
-        "  --tasks mmlu_all \\\n",
-        "  --output output/mmlu/ \\\n",
-        "  --use_cache cache \\\n",
-        "  --device cuda:0 \\\n",
-        "  --log_samples\n",
-        "  #  --limit 10"
       ],
       "metadata": {
-        "id": "cFFYPzBIYGf7"
       },
       "execution_count": null,
       "outputs": []
     },
     {
-      "cell_type": "markdown",
       "source": [
-        "Mistral Models"
-      ],
       "metadata": {
-        "id": "1fEX-49hQ-Be"
-      }
     },
     {
       "cell_type": "code",
       "source": [
-        "!lm_eval --model hf \\\n",
-        "  --model_args pretrained=mistralai/Mistral-7B-v0.1 \\\n",
-        "  --include_path ./ \\\n",
-        "  --tasks mmlu_all \\\n",
-        "  --output output/mmlu/ \\\n",
-        "  --use_cache cache \\\n",
-        "  --device cuda:0 \\\n",
-        "  --log_samples\n",
-        "  #  --limit 10"
       ],
       "metadata": {
-        "id": "3cHI2qxN2fJ0"
       },
       "execution_count": null,
       "outputs": []
     },
-    {
-      "cell_type": "markdown",
-      "source": [],
-      "metadata": {
-        "id": "ZUTPHnV0kMB1"
-      }
     }
-  ]
 }

 {
   "cells": [
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "U8RTc2PmnX-v"
+      },
+      "source": [
+        "Initial setup"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
         "id": "kGW7vfRkrqHe"
       },
+      "outputs": [],
+      "source": [
+        "!pip install -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
         "id": "2I850FIsCVNw"
       },
+      "outputs": [],
+      "source": [
+        "from datetime import datetime\n",
+        "import os\n",
+        "from huggingface_hub import login, upload_folder\n",
+        "from google.colab import userdata\n",
+        "import shutil\n",
+        "\n",
+        "HF_TOKEN = userdata.get('HUGGING_FACE_WRITE_TOKEN')\n",
+        "login(HF_TOKEN, True)\n",
+        "BASE_DATASET='mmlu'\n",
+        "REPO_ID='flunardelli/llm-metaeval'\n",
+        "BASE_FOLDER=f\"/content/{BASE_DATASET}/\"#{datetime.now().strftime('%Y-%m-%dT%H-%M-%S')}\n",
+        "OUTPUT_FOLDER=os.path.join(BASE_FOLDER,'output')\n",
+        "TASK_FOLDER=os.path.join(BASE_FOLDER,'tasks')\n",
+        "#shutil.rmtree(BASE_FOLDER)\n",
+        "os.makedirs(OUTPUT_FOLDER)\n",
+        "os.makedirs(TASK_FOLDER)\n",
+        "os.environ['HF_TOKEN'] = HF_TOKEN\n",
+        "os.environ['OUTPUT_FOLDER'] = OUTPUT_FOLDER\n",
+        "os.environ['TASK_FOLDER'] = TASK_FOLDER\n",
+        "\n",
+        "def hf_upload_folder(folder_path):\n",
+        "  upload_folder(\n",
+        "      folder_path=folder_path,\n",
+        "      path_in_repo=\"evals/\",\n",
+        "      repo_id=REPO_ID,\n",
+        "      token=HF_TOKEN,\n",
+        "      repo_type=\"dataset\"\n",
+        "  )\n",
+        "\n",
+        "def create_task(content, filename):\n",
+        "  filename_path = os.path.join(TASK_FOLDER,filename)\n",
+        "  with open(filename_path, \"w\") as f:\n",
+        "    f.write(content)"
+      ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "Jd2JwKZaPkNS"
+      },
+      "source": [
+        "Create task for MMLU all datasets"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "xP0cC_sHih7C"
+      },
+      "outputs": [],
       "source": [
         "YAML_mmlu_en_us_string = \"\"\"\n",
         "task: mmlu_all\n",
         "dataset_path: cais/mmlu\n",
         "dataset_name: all\n",
+        "description: \"MMLU dataset\"\n",
         "test_split: test\n",
         "fewshot_split: dev\n",
         "fewshot_config:\n",
         "    aggregation: mean\n",
         "    higher_is_better: true\n",
         "\"\"\"\n",
+        "create_task(YAML_mmlu_en_us_string, 'mmlu_en_us.yaml')\n"
+      ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "mJjo_A5tP-Td"
+      },
+      "source": [
+        "Llama Models"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
         "id": "IzP5nyP0Gwk8"
       },
+      "outputs": [],
+      "source": [
+        "!accelerate launch -m lm_eval \\\n",
+        "--model hf --model_args pretrained=meta-llama/Llama-3.2-1B-Instruct,parallelize=True \\\n",
+        "--tasks mmlu_all \\\n",
+        "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
+        "--batch_size 16\n",
+        "#--limit 10 \\"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
         "id": "oIACOAhDW5ow"
       },
+      "outputs": [],
+      "source": [
+        "!accelerate launch -m lm_eval \\\n",
+        "--model hf --model_args pretrained=meta-llama/Llama-3.2-3B-Instruct,parallelize=True \\\n",
+        "--tasks mmlu_all \\\n",
+        "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
+        "--batch_size 16\n",
+        "#--limit 10 \\"
+      ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cFFYPzBIYGf7"
+      },
+      "outputs": [],
       "source": [
+        "!accelerate launch -m lm_eval \\\n",
+        "--model hf --model_args pretrained=meta-llama/Meta-Llama-3-8B,parallelize=True \\\n",
+        "--tasks mmlu_all \\\n",
+        "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
+        "--batch_size 16\n",
+        "#--limit 10 \\"
+      ]
+    },
+    {
+      "cell_type": "markdown",
       "metadata": {
+        "id": "1fEX-49hQ-Be"
       },
+      "source": [
+        "Mistral Models"
+      ]
     },
     {
       "cell_type": "code",
       "source": [
+        "!accelerate launch -m lm_eval \\\n",
+        "--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,parallelize=True \\\n",
+        "--tasks mmlu_all \\\n",
+        "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
+        "--batch_size 16\n",
+        "#--limit 10 \\"
       ],
       "metadata": {
+        "id": "ilu9_ulWTy3p"
       },
       "execution_count": null,
       "outputs": []
     },
     {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "3cHI2qxN2fJ0"
+      },
+      "outputs": [],
       "source": [
+        "!accelerate launch -m lm_eval \\\n",
+        "--model hf --model_args pretrained=mistralai/Mixtral-8x22B-v0.1,parallelize=True \\\n",
+        "--tasks mmlu_all \\\n",
+        "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
+        "--batch_size 16\n",
+        "#--limit 10 \\"
+      ]
+    },
+    {
+      "cell_type": "markdown",
       "metadata": {
+        "id": "ZUTPHnV0kMB1"
+      },
+      "source": [
+        "Save output results"
+      ]
     },
     {
       "cell_type": "code",
       "source": [
+        "hf_upload_folder(BASE_FOLDER)"
       ],
       "metadata": {
+        "id": "mGGdqBNBzFYL"
       },
       "execution_count": null,
       "outputs": []
+    }
+  ],
+  "metadata": {
+    "accelerator": "GPU",
+    "colab": {
+      "gpuType": "T4",
+      "provenance": [],
+      "machine_shape": "hm"
     },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "name": "python"
     }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
 }