flunardelli
/

llm-metaeval

Model card Files Files and versions Community

flunardelli commited on Nov 25, 2024

Commit

5d9d806

verified ·

1 Parent(s): 8e20010

Upload 2 files

Browse files

Files changed (2) hide show

llm_metaeval_eval_harness_mmlu.ipynb +76 -28
llm_metaeval_eval_harness_pub.ipynb +133 -77

llm_metaeval_eval_harness_mmlu.ipynb CHANGED Viewed

@@ -34,7 +34,7 @@
         "from google.colab import userdata\n",
         "import shutil\n",
         "\n",
-        "HF_TOKEN = userdata.get('HUGGING_FACE_WRITE_TOKEN')\n",
         "login(HF_TOKEN, True)\n",
         "BASE_DATASET='mmlu'\n",
         "REPO_ID='flunardelli/llm-metaeval'\n",
@@ -101,7 +101,8 @@
         "    aggregation: mean\n",
         "    higher_is_better: true\n",
         "\"\"\"\n",
-        "create_task(YAML_mmlu_en_us_string, 'mmlu_en_us.yaml')\n"
       ]
     },
     {
@@ -117,50 +118,92 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
         "id": "IzP5nyP0Gwk8"
       },
       "outputs": [],
       "source": [
         "!accelerate launch -m lm_eval \\\n",
         "--model hf --model_args pretrained=meta-llama/Llama-3.2-1B-Instruct,parallelize=True \\\n",
-        "--tasks mmlu_all \\\n",
         "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
         "--batch_size 16\n",
         "#--limit 10 \\"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
         "id": "oIACOAhDW5ow"
       },
       "outputs": [],
       "source": [
         "!accelerate launch -m lm_eval \\\n",
         "--model hf --model_args pretrained=meta-llama/Llama-3.2-3B-Instruct,parallelize=True \\\n",
-        "--tasks mmlu_all \\\n",
         "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
         "--batch_size 16\n",
         "#--limit 10 \\"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
         "id": "cFFYPzBIYGf7"
       },
       "outputs": [],
       "source": [
         "!accelerate launch -m lm_eval \\\n",
         "--model hf --model_args pretrained=meta-llama/Meta-Llama-3-8B,parallelize=True \\\n",
-        "--tasks mmlu_all \\\n",
         "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
         "--batch_size 16\n",
         "#--limit 10 \\"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
@@ -172,16 +215,30 @@
     },
     {
       "cell_type": "code",
       "source": [
         "!accelerate launch -m lm_eval \\\n",
         "--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,parallelize=True \\\n",
-        "--tasks mmlu_all \\\n",
         "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
         "--batch_size 16\n",
         "#--limit 10 \\"
       ],
       "metadata": {
-        "id": "ilu9_ulWTy3p"
       },
       "execution_count": null,
       "outputs": []
@@ -194,41 +251,32 @@
       },
       "outputs": [],
       "source": [
-        "!accelerate launch -m lm_eval \\\n",
-        "--model hf --model_args pretrained=mistralai/Mixtral-8x22B-v0.1,parallelize=True \\\n",
-        "--tasks mmlu_all \\\n",
         "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
-        "--batch_size 16\n",
         "#--limit 10 \\"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ZUTPHnV0kMB1"
-      },
-      "source": [
-        "Save output results"
-      ]
-    },
     {
       "cell_type": "code",
-      "source": [
-        "hf_upload_folder(BASE_FOLDER)"
-      ],
       "metadata": {
         "id": "mGGdqBNBzFYL"
       },
-      "execution_count": null,
-      "outputs": []
     }
   ],
   "metadata": {
     "accelerator": "GPU",
     "colab": {
-      "gpuType": "T4",
-      "provenance": [],
-      "machine_shape": "hm"
     },
     "kernelspec": {
       "display_name": "Python 3",

         "from google.colab import userdata\n",
         "import shutil\n",
         "\n",
+        "HF_TOKEN = userdata.get('HF_TOKEN')\n",
         "login(HF_TOKEN, True)\n",
         "BASE_DATASET='mmlu'\n",
         "REPO_ID='flunardelli/llm-metaeval'\n",
         "    aggregation: mean\n",
         "    higher_is_better: true\n",
         "\"\"\"\n",
+        "create_task(YAML_mmlu_en_us_string, 'mmlu_en_us.yaml')\n",
+        "os.environ['TASKS'] = 'mmlu_all'\n"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
+        "colab": {
+          "background_save": true
+        },
         "id": "IzP5nyP0Gwk8"
       },
       "outputs": [],
       "source": [
         "!accelerate launch -m lm_eval \\\n",
         "--model hf --model_args pretrained=meta-llama/Llama-3.2-1B-Instruct,parallelize=True \\\n",
+        "--tasks $TASKS \\\n",
         "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
         "--batch_size 16\n",
         "#--limit 10 \\"
       ]
     },
+    {
+      "cell_type": "code",
+      "source": [
+        "hf_upload_folder(BASE_FOLDER)"
+      ],
+      "metadata": {
+        "id": "uMoitxJkHerH"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
+        "colab": {
+          "background_save": true
+        },
         "id": "oIACOAhDW5ow"
       },
       "outputs": [],
       "source": [
         "!accelerate launch -m lm_eval \\\n",
         "--model hf --model_args pretrained=meta-llama/Llama-3.2-3B-Instruct,parallelize=True \\\n",
+        "--tasks $TASKS \\\n",
         "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
         "--batch_size 16\n",
         "#--limit 10 \\"
       ]
     },
+    {
+      "cell_type": "code",
+      "source": [
+        "hf_upload_folder(BASE_FOLDER)"
+      ],
+      "metadata": {
+        "id": "eIUOqu5sHfkM"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
+        "colab": {
+          "background_save": true
+        },
         "id": "cFFYPzBIYGf7"
       },
       "outputs": [],
       "source": [
         "!accelerate launch -m lm_eval \\\n",
         "--model hf --model_args pretrained=meta-llama/Meta-Llama-3-8B,parallelize=True \\\n",
+        "--tasks $TASKS \\\n",
         "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
         "--batch_size 16\n",
         "#--limit 10 \\"
       ]
     },
+    {
+      "cell_type": "code",
+      "source": [
+        "hf_upload_folder(BASE_FOLDER)"
+      ],
+      "metadata": {
+        "id": "xsL82Q4SHgMn"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
     {
       "cell_type": "markdown",
       "metadata": {
     },
     {
       "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "colab": {
+          "background_save": true
+        },
+        "id": "ilu9_ulWTy3p"
+      },
+      "outputs": [],
       "source": [
         "!accelerate launch -m lm_eval \\\n",
         "--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,parallelize=True \\\n",
+        "--tasks $TASKS \\\n",
         "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
         "--batch_size 16\n",
         "#--limit 10 \\"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "hf_upload_folder(BASE_FOLDER)"
       ],
       "metadata": {
+        "id": "jE5r8gVDHhAz"
       },
       "execution_count": null,
       "outputs": []
       },
       "outputs": [],
       "source": [
+        "!accelerate launch --multi_gpu --num_processes 4 -m lm_eval  \\\n",
+        "--model hf --model_args pretrained=mistralai/Mixtral-8x22B-v0.1 \\\n",
+        "--tasks $TASKS \\\n",
         "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
+        "--batch_size 8\n",
         "#--limit 10 \\"
       ]
     },
     {
       "cell_type": "code",
+      "execution_count": null,
       "metadata": {
         "id": "mGGdqBNBzFYL"
       },
+      "outputs": [],
+      "source": [
+        "hf_upload_folder(BASE_FOLDER)"
+      ]
     }
   ],
   "metadata": {
     "accelerator": "GPU",
     "colab": {
+      "gpuType": "L4",
+      "machine_shape": "hm",
+      "provenance": []
     },
     "kernelspec": {
       "display_name": "Python 3",

llm_metaeval_eval_harness_pub.ipynb CHANGED Viewed

@@ -4,7 +4,8 @@
   "metadata": {
     "colab": {
       "provenance": [],
-      "gpuType": "T4"
     },
     "kernelspec": {
       "name": "python3",
@@ -39,11 +40,42 @@
     {
       "cell_type": "code",
       "source": [
-        "from huggingface_hub import notebook_login\n",
-        "notebook_login()"
       ],
       "metadata": {
-        "id": "2I850FIsCVNw"
       },
       "execution_count": null,
       "outputs": []
@@ -112,35 +144,15 @@
         "              .replace('__options__',templace_choices)\n",
         "              .replace('__dataset_name__',dataset_name).replace('__task_name__',task_name)\n",
         "              )\n",
-        "  with open(f\"pub_{dataset_name}.yaml\", \"w\") as f:\n",
-        "    f.write(template)\n",
         "\n",
-        "','.join(tasks)"
       ],
       "metadata": {
-        "id": "xP0cC_sHih7C",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 35
-        },
-        "outputId": "fcf3ed9e-1422-47f3-e234-016435c8b212"
       },
-      "execution_count": 1,
-      "outputs": [
-        {
-          "output_type": "execute_result",
-          "data": {
-            "text/plain": [
-              "'pub_task_1,pub_task_2,pub_task_3,pub_task_4,pub_task_5,pub_task_6,pub_task_7,pub_task_8,pub_task_9,pub_task_10,pub_task_11,pub_task_12,pub_task_13,pub_task_14'"
-            ],
-            "application/vnd.google.colaboratory.intrinsic+json": {
-              "type": "string"
-            }
-          },
-          "metadata": {},
-          "execution_count": 1
-        }
-      ]
     },
     {
       "cell_type": "markdown",
@@ -154,18 +166,14 @@
     {
       "cell_type": "code",
       "source": [
-        "!lm_eval --model hf \\\n",
-        "  --model_args pretrained=meta-llama/Llama-3.2-1B-Instruct \\\n",
-        "  --include_path ./ \\\n",
-        "  --tasks pub_task_1,pub_task_2,pub_task_3,pub_task_4,pub_task_5,pub_task_6,pub_task_7,pub_task_8,pub_task_9,pub_task_10,pub_task_11,pub_task_12,pub_task_13,pub_task_14 \\\n",
-        "  --output output/pub/ \\\n",
-        "  --use_cache cache \\\n",
-        "  --device cuda:0 \\\n",
-        "  --log_samples\n",
-        "  #  --limit 10\n"
       ],
       "metadata": {
-        "id": "IzP5nyP0Gwk8"
       },
       "execution_count": null,
       "outputs": []
@@ -173,15 +181,22 @@
     {
       "cell_type": "code",
       "source": [
-        "!lm_eval --model hf \\\n",
-        "  --model_args pretrained=meta-llama/Llama-3.2-3B-Instruct \\\n",
-        "  --include_path ./ \\\n",
-        "  --tasks pub_task_1,pub_task_2,pub_task_3,pub_task_4,pub_task_5,pub_task_6,pub_task_7,pub_task_8,pub_task_9,pub_task_10,pub_task_11,pub_task_12,pub_task_13,pub_task_14 \\\n",
-        "  --output output/pub/ \\\n",
-        "  --use_cache cache \\\n",
-        "  --device cuda:0 \\\n",
-        "  --log_samples\n",
-        "  #  --limit 10"
       ],
       "metadata": {
         "id": "oIACOAhDW5ow"
@@ -192,15 +207,22 @@
     {
       "cell_type": "code",
       "source": [
-        "!lm_eval --model hf \\\n",
-        "  --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1  \\\n",
-        "  --include_path ./ \\\n",
-        "  --tasks pub_task_1,pub_task_2,pub_task_3,pub_task_4,pub_task_5,pub_task_6,pub_task_7,pub_task_8,pub_task_9,pub_task_10,pub_task_11,pub_task_12,pub_task_13,pub_task_14 \\\n",
-        "  --output output/pub/ \\\n",
-        "  --use_cache cache \\\n",
-        "  --device cuda:0 \\\n",
-        "  --log_samples\n",
-        "  #  --limit 10"
       ],
       "metadata": {
         "id": "1Nxw4WNxZUyb"
@@ -211,18 +233,10 @@
     {
       "cell_type": "code",
       "source": [
-        "!lm_eval --model hf \\\n",
-        "  --model_args pretrained=meta-llama/Meta-Llama-3-8B \\\n",
-        "  --include_path ./ \\\n",
-        "  --tasks pub_task_1,pub_task_2,pub_task_3,pub_task_4,pub_task_5,pub_task_6,pub_task_7,pub_task_8,pub_task_9,pub_task_10,pub_task_11,pub_task_12,pub_task_13,pub_task_14 \\\n",
-        "  --output output/pub/ \\\n",
-        "  --use_cache cache \\\n",
-        "  --device cuda:0 \\\n",
-        "  --log_samples\n",
-        "  #  --limit 10"
       ],
       "metadata": {
-        "id": "cFFYPzBIYGf7"
       },
       "execution_count": null,
       "outputs": []
@@ -239,18 +253,60 @@
     {
       "cell_type": "code",
       "source": [
-        "!lm_eval --model hf \\\n",
-        "  --model_args pretrained=mistralai/Mistral-7B-v0.1 \\\n",
-        "  --include_path ./ \\\n",
-        "  --tasks pub_task_1,pub_task_2,pub_task_3,pub_task_4,pub_task_5,pub_task_6,pub_task_7,pub_task_8,pub_task_9,pub_task_10,pub_task_11,pub_task_12,pub_task_13,pub_task_14 \\\n",
-        "  --output output/pub/ \\\n",
-        "  --use_cache cache \\\n",
-        "  --device cuda:0 \\\n",
-        "  --log_samples\n",
-        "  #  --limit 10"
       ],
       "metadata": {
-        "id": "3cHI2qxN2fJ0"
       },
       "execution_count": null,
       "outputs": []

   "metadata": {
     "colab": {
       "provenance": [],
+      "gpuType": "L4",
+      "machine_shape": "hm"
     },
     "kernelspec": {
       "name": "python3",
     {
       "cell_type": "code",
       "source": [
+        "from datetime import datetime\n",
+        "import os\n",
+        "from huggingface_hub import login, upload_folder\n",
+        "from google.colab import userdata\n",
+        "import shutil\n",
+        "\n",
+        "HF_TOKEN = userdata.get('HF_TOKEN')\n",
+        "login(HF_TOKEN, True)\n",
+        "BASE_DATASET='pub'\n",
+        "REPO_ID='flunardelli/llm-metaeval'\n",
+        "BASE_FOLDER=f\"/content/{BASE_DATASET}/\"#{datetime.now().strftime('%Y-%m-%dT%H-%M-%S')}\n",
+        "OUTPUT_FOLDER=os.path.join(BASE_FOLDER,'output')\n",
+        "TASK_FOLDER=os.path.join(BASE_FOLDER,'tasks')\n",
+        "#shutil.rmtree(BASE_FOLDER)\n",
+        "os.makedirs(OUTPUT_FOLDER)\n",
+        "os.makedirs(TASK_FOLDER)\n",
+        "os.environ['HF_TOKEN'] = HF_TOKEN\n",
+        "os.environ['OUTPUT_FOLDER'] = OUTPUT_FOLDER\n",
+        "os.environ['TASK_FOLDER'] = TASK_FOLDER\n",
+        "\n",
+        "def hf_upload_folder(folder_path):\n",
+        "  upload_folder(\n",
+        "      folder_path=folder_path,\n",
+        "      path_in_repo=\"evals/\",\n",
+        "      repo_id=REPO_ID,\n",
+        "      token=HF_TOKEN,\n",
+        "      repo_type=\"dataset\"\n",
+        "  )\n",
+        "\n",
+        "def create_task(content, filename):\n",
+        "  filename_path = os.path.join(TASK_FOLDER,filename)\n",
+        "  with open(filename_path, \"w\") as f:\n",
+        "    f.write(content)"
       ],
       "metadata": {
+        "id": "IHxFvAC4eSnW"
       },
       "execution_count": null,
       "outputs": []
         "              .replace('__options__',templace_choices)\n",
         "              .replace('__dataset_name__',dataset_name).replace('__task_name__',task_name)\n",
         "              )\n",
+        "  create_task(template, f\"pub_{dataset_name}.yaml\")\n",
         "\n",
+        "os.environ['TASKS'] = ','.join(tasks)"
       ],
       "metadata": {
+        "id": "xP0cC_sHih7C"
       },
+      "execution_count": null,
+      "outputs": []
     },
     {
       "cell_type": "markdown",
     {
       "cell_type": "code",
       "source": [
+        "!for i in $(echo $TASKS|tr ',' ' '); do accelerate launch -m lm_eval \\\n",
+        "--model hf --model_args pretrained=meta-llama/Llama-3.2-1B-Instruct,parallelize=True \\\n",
+        "--tasks $i \\\n",
+        "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
+        "--batch_size 8; done"
       ],
       "metadata": {
+        "id": "NOwy6ZlY3Mw7"
       },
       "execution_count": null,
       "outputs": []
     {
       "cell_type": "code",
       "source": [
+        "hf_upload_folder(BASE_FOLDER)"
+      ],
+      "metadata": {
+        "id": "v-7drt76r9wG"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!for i in $(echo $TASKS|tr ',' ' '); do accelerate launch -m lm_eval \\\n",
+        "--model hf --model_args pretrained=meta-llama/Llama-3.2-3B-Instruct,parallelize=True \\\n",
+        "--tasks $i \\\n",
+        "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
+        "--batch_size 8; done"
       ],
       "metadata": {
         "id": "oIACOAhDW5ow"
     {
       "cell_type": "code",
       "source": [
+        "hf_upload_folder(BASE_FOLDER)"
+      ],
+      "metadata": {
+        "id": "XowpCSOHr-qr"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!for i in $(echo $TASKS|tr ',' ' '); do accelerate launch -m lm_eval \\\n",
+        "--model hf --model_args pretrained=meta-llama/Meta-Llama-3-8B,parallelize=True \\\n",
+        "--tasks $i \\\n",
+        "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
+        "--batch_size 8; done"
       ],
       "metadata": {
         "id": "1Nxw4WNxZUyb"
     {
       "cell_type": "code",
       "source": [
+        "hf_upload_folder(BASE_FOLDER)"
       ],
       "metadata": {
+        "id": "aNx_r4ZBr_ZW"
       },
       "execution_count": null,
       "outputs": []
     {
       "cell_type": "code",
       "source": [
+        "!for i in $(echo $TASKS|tr ',' ' '); do accelerate launch -m lm_eval \\\n",
+        "--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,parallelize=True \\\n",
+        "--tasks $i \\\n",
+        "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
+        "--batch_size 8; done"
+      ],
+      "metadata": {
+        "id": "E3dBWV1V9C-O"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "hf_upload_folder(BASE_FOLDER)"
+      ],
+      "metadata": {
+        "id": "NcGYz2g7sKe7"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "!for i in $(echo $TASKS|tr ',' ' '); do accelerate launch -m lm_eval \\\n",
+        "--model hf --model_args pretrained=mistralai/Mixtral-8x22B-v0.1,parallelize=True \\\n",
+        "--tasks $i \\\n",
+        "--include_path $TASK_FOLDER/. --output $OUTPUT_FOLDER --use_cache cache --log_samples \\\n",
+        "--batch_size 8; done"
+      ],
+      "metadata": {
+        "id": "LPqTo2z29RKx"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "Save output results"
+      ],
+      "metadata": {
+        "id": "U8qh9BEbgBy7"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "hf_upload_folder(BASE_FOLDER)"
       ],
       "metadata": {
+        "id": "ZQl05b1rf83u"
       },
       "execution_count": null,
       "outputs": []