flunardelli
/

llm-metaeval

Model card Files Files and versions Community

flunardelli commited on Dec 5, 2024

Commit

bba9479

verified ·

1 Parent(s): 0985eef

Upload 3 files

Browse files

Files changed (2) hide show

llm_metaeval_eval_harness_mmlu.ipynb +12 -1346
llm_metaeval_eval_harness_results.ipynb +0 -0

llm_metaeval_eval_harness_mmlu.ipynb CHANGED Viewed

@@ -13,189 +13,9 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "id": "kGW7vfRkrqHe",
-        "outputId": "a51e7052-6e2d-431e-a46b-63d92254c8cc"
       },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "Collecting lm_eval@ git+https://github.com/EleutherAI/[email protected] (from -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Cloning https://github.com/EleutherAI/lm-evaluation-harness.git (to revision v0.4.6) to /tmp/pip-install-pwn3euso/lm-eval_15e9ff6cb88d46aa9b65c222332a3c3b\n",
-            "  Running command git clone --filter=blob:none --quiet https://github.com/EleutherAI/lm-evaluation-harness.git /tmp/pip-install-pwn3euso/lm-eval_15e9ff6cb88d46aa9b65c222332a3c3b\n",
-            "  Running command git checkout -q 9d36354e6422db94c374dc476c093b1495370983\n",
-            "  Resolved https://github.com/EleutherAI/lm-evaluation-harness.git to commit 9d36354e6422db94c374dc476c093b1495370983\n",
-            "  Installing build dependencies ... \u001b[?25l\u001b[?25hdone\n",
-            "  Getting requirements to build wheel ... \u001b[?25l\u001b[?25hdone\n",
-            "  Preparing metadata (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
-            "Requirement already satisfied: accelerate==1.1.1 in /usr/local/lib/python3.10/dist-packages (from -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (1.1.1)\n",
-            "Collecting bitsandbytes==0.44.1 (from -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 2))\n",
-            "  Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl.metadata (3.5 kB)\n",
-            "Requirement already satisfied: huggingface-hub>=0.21.0 in /usr/local/lib/python3.10/dist-packages (from accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (0.26.2)\n",
-            "Requirement already satisfied: numpy<3.0.0,>=1.17 in /usr/local/lib/python3.10/dist-packages (from accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (1.26.4)\n",
-            "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.10/dist-packages (from accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (24.2)\n",
-            "Requirement already satisfied: psutil in /usr/local/lib/python3.10/dist-packages (from accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (5.9.5)\n",
-            "Requirement already satisfied: pyyaml in /usr/local/lib/python3.10/dist-packages (from accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (6.0.2)\n",
-            "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.10/dist-packages (from accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (0.4.5)\n",
-            "Requirement already satisfied: torch>=1.10.0 in /usr/local/lib/python3.10/dist-packages (from accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (2.5.1+cu121)\n",
-            "Collecting evaluate (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading evaluate-0.4.3-py3-none-any.whl.metadata (9.2 kB)\n",
-            "Collecting datasets>=2.16.0 (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading datasets-3.1.0-py3-none-any.whl.metadata (20 kB)\n",
-            "Collecting jsonlines (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading jsonlines-4.0.0-py3-none-any.whl.metadata (1.6 kB)\n",
-            "Requirement already satisfied: numexpr in /usr/local/lib/python3.10/dist-packages (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2.10.1)\n",
-            "Requirement already satisfied: peft>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (0.13.2)\n",
-            "Collecting pybind11>=2.6.2 (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading pybind11-2.13.6-py3-none-any.whl.metadata (9.5 kB)\n",
-            "Collecting pytablewriter (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading pytablewriter-1.2.0-py3-none-any.whl.metadata (37 kB)\n",
-            "Collecting rouge-score>=0.0.4 (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading rouge_score-0.1.2.tar.gz (17 kB)\n",
-            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Collecting sacrebleu>=1.5.0 (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading sacrebleu-2.4.3-py3-none-any.whl.metadata (51 kB)\n",
-            "\u001b[2K     \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m51.8/51.8 kB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hRequirement already satisfied: scikit-learn>=0.24.1 in /usr/local/lib/python3.10/dist-packages (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (1.5.2)\n",
-            "Collecting sqlitedict (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading sqlitedict-2.1.0.tar.gz (21 kB)\n",
-            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Collecting tqdm-multiprocess (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading tqdm_multiprocess-0.0.11-py3-none-any.whl.metadata (5.7 kB)\n",
-            "Requirement already satisfied: transformers>=4.1 in /usr/local/lib/python3.10/dist-packages (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (4.46.2)\n",
-            "Collecting zstandard (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading zstandard-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (3.0 kB)\n",
-            "Collecting dill (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading dill-0.3.9-py3-none-any.whl.metadata (10 kB)\n",
-            "Collecting word2number (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading word2number-1.1.zip (9.7 kB)\n",
-            "  Preparing metadata (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "Requirement already satisfied: more_itertools in /usr/local/lib/python3.10/dist-packages (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (10.5.0)\n",
-            "Requirement already satisfied: filelock in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (3.16.1)\n",
-            "Requirement already satisfied: pyarrow>=15.0.0 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (17.0.0)\n",
-            "Collecting dill (from lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading dill-0.3.8-py3-none-any.whl.metadata (10 kB)\n",
-            "Requirement already satisfied: pandas in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2.2.2)\n",
-            "Requirement already satisfied: requests>=2.32.2 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2.32.3)\n",
-            "Requirement already satisfied: tqdm>=4.66.3 in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (4.66.6)\n",
-            "Collecting xxhash (from datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (12 kB)\n",
-            "Collecting multiprocess<0.70.17 (from datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading multiprocess-0.70.16-py310-none-any.whl.metadata (7.2 kB)\n",
-            "Collecting fsspec<=2024.9.0,>=2023.1.0 (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading fsspec-2024.9.0-py3-none-any.whl.metadata (11 kB)\n",
-            "Requirement already satisfied: aiohttp in /usr/local/lib/python3.10/dist-packages (from datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (3.11.2)\n",
-            "Requirement already satisfied: typing-extensions>=3.7.4.3 in /usr/local/lib/python3.10/dist-packages (from huggingface-hub>=0.21.0->accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (4.12.2)\n",
-            "Requirement already satisfied: absl-py in /usr/local/lib/python3.10/dist-packages (from rouge-score>=0.0.4->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (1.4.0)\n",
-            "Requirement already satisfied: nltk in /usr/local/lib/python3.10/dist-packages (from rouge-score>=0.0.4->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (3.9.1)\n",
-            "Requirement already satisfied: six>=1.14.0 in /usr/local/lib/python3.10/dist-packages (from rouge-score>=0.0.4->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (1.16.0)\n",
-            "Collecting portalocker (from sacrebleu>=1.5.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading portalocker-3.0.0-py3-none-any.whl.metadata (8.5 kB)\n",
-            "Requirement already satisfied: regex in /usr/local/lib/python3.10/dist-packages (from sacrebleu>=1.5.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2024.9.11)\n",
-            "Requirement already satisfied: tabulate>=0.8.9 in /usr/local/lib/python3.10/dist-packages (from sacrebleu>=1.5.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (0.9.0)\n",
-            "Collecting colorama (from sacrebleu>=1.5.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading colorama-0.4.6-py2.py3-none-any.whl.metadata (17 kB)\n",
-            "Requirement already satisfied: lxml in /usr/local/lib/python3.10/dist-packages (from sacrebleu>=1.5.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (5.3.0)\n",
-            "Requirement already satisfied: scipy>=1.6.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.24.1->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (1.13.1)\n",
-            "Requirement already satisfied: joblib>=1.2.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.24.1->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (1.4.2)\n",
-            "Requirement already satisfied: threadpoolctl>=3.1.0 in /usr/local/lib/python3.10/dist-packages (from scikit-learn>=0.24.1->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (3.5.0)\n",
-            "Requirement already satisfied: networkx in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (3.4.2)\n",
-            "Requirement already satisfied: jinja2 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (3.1.4)\n",
-            "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.10/dist-packages (from torch>=1.10.0->accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (1.13.1)\n",
-            "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.10/dist-packages (from sympy==1.13.1->torch>=1.10.0->accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (1.3.0)\n",
-            "Requirement already satisfied: tokenizers<0.21,>=0.20 in /usr/local/lib/python3.10/dist-packages (from transformers>=4.1->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (0.20.3)\n",
-            "Requirement already satisfied: attrs>=19.2.0 in /usr/local/lib/python3.10/dist-packages (from jsonlines->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (24.2.0)\n",
-            "Requirement already satisfied: setuptools>=38.3.0 in /usr/local/lib/python3.10/dist-packages (from pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (75.1.0)\n",
-            "Collecting DataProperty<2,>=1.0.1 (from pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading DataProperty-1.0.1-py3-none-any.whl.metadata (11 kB)\n",
-            "Collecting mbstrdecoder<2,>=1.0.0 (from pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading mbstrdecoder-1.1.3-py3-none-any.whl.metadata (4.0 kB)\n",
-            "Collecting pathvalidate<4,>=2.3.0 (from pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading pathvalidate-3.2.1-py3-none-any.whl.metadata (12 kB)\n",
-            "Collecting tabledata<2,>=1.3.1 (from pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading tabledata-1.3.3-py3-none-any.whl.metadata (3.7 kB)\n",
-            "Collecting tcolorpy<1,>=0.0.5 (from pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading tcolorpy-0.1.6-py3-none-any.whl.metadata (6.4 kB)\n",
-            "Collecting typepy<2,>=1.3.2 (from typepy[datetime]<2,>=1.3.2->pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3))\n",
-            "  Downloading typepy-1.3.2-py3-none-any.whl.metadata (9.3 kB)\n",
-            "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2.4.3)\n",
-            "Requirement already satisfied: aiosignal>=1.1.2 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (1.3.1)\n",
-            "Requirement already satisfied: frozenlist>=1.1.1 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (1.5.0)\n",
-            "Requirement already satisfied: multidict<7.0,>=4.5 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (6.1.0)\n",
-            "Requirement already satisfied: propcache>=0.2.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (0.2.0)\n",
-            "Requirement already satisfied: yarl<2.0,>=1.17.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (1.17.2)\n",
-            "Requirement already satisfied: async-timeout<6.0,>=4.0 in /usr/local/lib/python3.10/dist-packages (from aiohttp->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (4.0.3)\n",
-            "Requirement already satisfied: chardet<6,>=3.0.4 in /usr/local/lib/python3.10/dist-packages (from mbstrdecoder<2,>=1.0.0->pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (5.2.0)\n",
-            "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (3.4.0)\n",
-            "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (3.10)\n",
-            "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2.2.3)\n",
-            "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.10/dist-packages (from requests>=2.32.2->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2024.8.30)\n",
-            "Requirement already satisfied: python-dateutil<3.0.0,>=2.8.0 in /usr/local/lib/python3.10/dist-packages (from typepy[datetime]<2,>=1.3.2->pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2.8.2)\n",
-            "Requirement already satisfied: pytz>=2018.9 in /usr/local/lib/python3.10/dist-packages (from typepy[datetime]<2,>=1.3.2->pytablewriter->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2024.2)\n",
-            "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.10/dist-packages (from jinja2->torch>=1.10.0->accelerate==1.1.1->-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 1)) (3.0.2)\n",
-            "Requirement already satisfied: click in /usr/local/lib/python3.10/dist-packages (from nltk->rouge-score>=0.0.4->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (8.1.7)\n",
-            "Requirement already satisfied: tzdata>=2022.7 in /usr/local/lib/python3.10/dist-packages (from pandas->datasets>=2.16.0->lm_eval@ git+https://github.com/EleutherAI/[email protected]>-r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt (line 3)) (2024.2)\n",
-            "Downloading bitsandbytes-0.44.1-py3-none-manylinux_2_24_x86_64.whl (122.4 MB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m122.4/122.4 MB\u001b[0m \u001b[31m18.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading datasets-3.1.0-py3-none-any.whl (480 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m480.6/480.6 kB\u001b[0m \u001b[31m39.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading dill-0.3.8-py3-none-any.whl (116 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m116.3/116.3 kB\u001b[0m \u001b[31m12.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading evaluate-0.4.3-py3-none-any.whl (84 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m84.0/84.0 kB\u001b[0m \u001b[31m8.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading pybind11-2.13.6-py3-none-any.whl (243 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m243.3/243.3 kB\u001b[0m \u001b[31m21.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading sacrebleu-2.4.3-py3-none-any.whl (103 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m104.0/104.0 kB\u001b[0m \u001b[31m11.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading jsonlines-4.0.0-py3-none-any.whl (8.7 kB)\n",
-            "Downloading pytablewriter-1.2.0-py3-none-any.whl (111 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m111.1/111.1 kB\u001b[0m \u001b[31m12.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading tqdm_multiprocess-0.0.11-py3-none-any.whl (9.8 kB)\n",
-            "Downloading zstandard-0.23.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.4 MB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m5.4/5.4 MB\u001b[0m \u001b[31m116.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading DataProperty-1.0.1-py3-none-any.whl (27 kB)\n",
-            "Downloading fsspec-2024.9.0-py3-none-any.whl (179 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m179.3/179.3 kB\u001b[0m \u001b[31m17.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading mbstrdecoder-1.1.3-py3-none-any.whl (7.8 kB)\n",
-            "Downloading multiprocess-0.70.16-py310-none-any.whl (134 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m134.8/134.8 kB\u001b[0m \u001b[31m14.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hDownloading pathvalidate-3.2.1-py3-none-any.whl (23 kB)\n",
-            "Downloading tabledata-1.3.3-py3-none-any.whl (11 kB)\n",
-            "Downloading tcolorpy-0.1.6-py3-none-any.whl (8.1 kB)\n",
-            "Downloading typepy-1.3.2-py3-none-any.whl (31 kB)\n",
-            "Downloading colorama-0.4.6-py2.py3-none-any.whl (25 kB)\n",
-            "Downloading portalocker-3.0.0-py3-none-any.whl (19 kB)\n",
-            "Downloading xxhash-3.5.0-cp310-cp310-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (194 kB)\n",
-            "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m194.1/194.1 kB\u001b[0m \u001b[31m18.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-            "\u001b[?25hBuilding wheels for collected packages: lm_eval, rouge-score, sqlitedict, word2number\n",
-            "  Building wheel for lm_eval (pyproject.toml) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for lm_eval: filename=lm_eval-0.4.5-py3-none-any.whl size=2451555 sha256=eb0bd7b20cd5649e4d6ae45d6f8857dc7d4201219e5f1fb7f05857d0ce40dafd\n",
-            "  Stored in directory: /tmp/pip-ephem-wheel-cache-j3ph5vqa/wheels/c0/f5/fd/b1db03e4fd2a4661835862d036a87a42bf868abe99903e333d\n",
-            "  Building wheel for rouge-score (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for rouge-score: filename=rouge_score-0.1.2-py3-none-any.whl size=24935 sha256=4de791eeae05926c2d1b4219891a96ab45504e46c483cd51a238bca08943b392\n",
-            "  Stored in directory: /root/.cache/pip/wheels/5f/dd/89/461065a73be61a532ff8599a28e9beef17985c9e9c31e541b4\n",
-            "  Building wheel for sqlitedict (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for sqlitedict: filename=sqlitedict-2.1.0-py3-none-any.whl size=16864 sha256=18e74dd174299b493b0bd17c4980b9f5a6c3d25b1534eb5ed9aee6508528b32c\n",
-            "  Stored in directory: /root/.cache/pip/wheels/79/d6/e7/304e0e6cb2221022c26d8161f7c23cd4f259a9e41e8bbcfabd\n",
-            "  Building wheel for word2number (setup.py) ... \u001b[?25l\u001b[?25hdone\n",
-            "  Created wheel for word2number: filename=word2number-1.1-py3-none-any.whl size=5568 sha256=3bff6053337559ad74336e3aa7d882744e338bd917ee99abc64e06dd30c22e8f\n",
-            "  Stored in directory: /root/.cache/pip/wheels/84/ff/26/d3cfbd971e96c5aa3737ecfced81628830d7359b55fbb8ca3b\n",
-            "Successfully built lm_eval rouge-score sqlitedict word2number\n",
-            "Installing collected packages: word2number, sqlitedict, zstandard, xxhash, tcolorpy, pybind11, portalocker, pathvalidate, mbstrdecoder, jsonlines, fsspec, dill, colorama, typepy, tqdm-multiprocess, sacrebleu, rouge-score, multiprocess, bitsandbytes, DataProperty, tabledata, datasets, pytablewriter, evaluate, lm_eval\n",
-            "  Attempting uninstall: fsspec\n",
-            "    Found existing installation: fsspec 2024.10.0\n",
-            "    Uninstalling fsspec-2024.10.0:\n",
-            "      Successfully uninstalled fsspec-2024.10.0\n",
-            "\u001b[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.\n",
-            "gcsfs 2024.10.0 requires fsspec==2024.10.0, but you have fsspec 2024.9.0 which is incompatible.\u001b[0m\u001b[31m\n",
-            "\u001b[0mSuccessfully installed DataProperty-1.0.1 bitsandbytes-0.44.1 colorama-0.4.6 datasets-3.1.0 dill-0.3.8 evaluate-0.4.3 fsspec-2024.9.0 jsonlines-4.0.0 lm_eval-0.4.5 mbstrdecoder-1.1.3 multiprocess-0.70.16 pathvalidate-3.2.1 portalocker-3.0.0 pybind11-2.13.6 pytablewriter-1.2.0 rouge-score-0.1.2 sacrebleu-2.4.3 sqlitedict-2.1.0 tabledata-1.3.3 tcolorpy-0.1.6 tqdm-multiprocess-0.0.11 typepy-1.3.2 word2number-1.1 xxhash-3.5.0 zstandard-0.23.0\n"
-          ]
-        }
-      ],
       "source": [
         "!pip install -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt"
       ]
@@ -312,42 +132,9 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "uMoitxJkHerH",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 77,
-          "referenced_widgets": [
-            "7b825d3c10be4fa6abd7c115f8e4e1f2",
-            "d426e85f0ce641d381d52927df68c278",
-            "8f30879a15de4244adea101f6dc05c35",
-            "96507b4059764b0493bbb286f4cd1c3a",
-            "21cc893be48e460ab6e83e0f685249c2",
-            "047db71ca65846f1bacf16fbcd9c40f8",
-            "3813240ddf704b91ac556c05fe7499f7",
-            "46f47846e9cd4b9b9cf321247fe0e518",
-            "922f354c9c5b4c9ebae6ad0e1c87b760",
-            "c1bdaff966f84b949b22b650311b6528",
-            "f16a675994434ddb8b5983faf193b381"
-          ]
-        },
-        "outputId": "b833672c-b855-494d-f480-8417ef199be8"
       },
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "samples_mmlu_all_2024-12-05T10-07-32.229420.jsonl:   0%|          | 0.00/89.9M [00:00<?, ?B/s]"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "7b825d3c10be4fa6abd7c115f8e4e1f2"
-            }
-          },
-          "metadata": {}
-        }
-      ],
       "source": [
         "hf_upload_folder(BASE_FOLDER)"
       ]
@@ -372,42 +159,9 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "eIUOqu5sHfkM",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 77,
-          "referenced_widgets": [
-            "79e90f6090ee493890acbf26292dd997",
-            "d245bcee114a4216bbfc6a1999daca08",
-            "e2acb9e61cec4083b3113415a205df12",
-            "08a63fea880840a2a472008d98bb03d2",
-            "004d4e339ec840c8bb748f4fdcef663a",
-            "3f9668c6708247f4b65ae9204b5df832",
-            "b1189ac61af44228a2e52026e73b0d2e",
-            "8c89d406ae534cf0a7ef99eba4263b2b",
-            "8adaf7159dcf4b318c2fc5930dd8633d",
-            "24bd51fe6e81460f9a9c99a1961a4107",
-            "2b0ffc94bfca468a89f0799cbef29ee5"
-          ]
-        },
-        "outputId": "50a091b8-93f4-48dd-93ec-6839b33fbd7e"
       },
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "samples_mmlu_all_2024-12-05T10-28-27.223863.jsonl:   0%|          | 0.00/89.9M [00:00<?, ?B/s]"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "79e90f6090ee493890acbf26292dd997"
-            }
-          },
-          "metadata": {}
-        }
-      ],
       "source": [
         "hf_upload_folder(BASE_FOLDER)"
       ]
@@ -432,42 +186,9 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "xsL82Q4SHgMn",
-        "colab": {
-          "base_uri": "https://localhost:8080/",
-          "height": 77,
-          "referenced_widgets": [
-            "c2b881fabd4f4758bd9aca516a579b4c",
-            "6ab81736ee0643c7ae5e20978914fef4",
-            "84ee9c147755422fb74229a2817ad35a",
-            "35025439e8e545d28ee9f3c51df4f1bb",
-            "da989b5fe95449f6928355cd317d7f4a",
-            "ed8214d39c2e4e47a7b32e1152597642",
-            "6210d27773144f78aed58c4ad9aaa0a2",
-            "0e65b64cc85c4b52aa48c11f65ccceeb",
-            "85504ad2f7be45e1a828b5077905dad5",
-            "a638974916d74c239e287002bc1d089a",
-            "a6fcb58400a54716bda1e4dc511add95"
-          ]
-        },
-        "outputId": "29244830-d996-40bf-9f1f-77536ffe10f7"
       },
-      "outputs": [
-        {
-          "output_type": "display_data",
-          "data": {
-            "text/plain": [
-              "samples_mmlu_all_2024-12-05T11-08-04.943164.jsonl:   0%|          | 0.00/89.9M [00:00<?, ?B/s]"
-            ],
-            "application/vnd.jupyter.widget-view+json": {
-              "version_major": 2,
-              "version_minor": 0,
-              "model_id": "c2b881fabd4f4758bd9aca516a579b4c"
-            }
-          },
-          "metadata": {}
-        }
-      ],
       "source": [
         "hf_upload_folder(BASE_FOLDER)"
       ]
@@ -486,21 +207,9 @@
       "execution_count": null,
       "metadata": {
         "collapsed": true,
-        "id": "ilu9_ulWTy3p",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "b87c2a88-022e-40c5-861b-90c51b8affbe"
       },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stdout",
-          "text": [
-            "^C\n"
-          ]
-        }
-      ],
       "source": [
         "!lm_eval \\\n",
         "--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,revision=41bd4c9e7e4fb318ca40e721131d4933966c2cc1,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n",
@@ -514,22 +223,9 @@
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "jE5r8gVDHhAz",
-        "colab": {
-          "base_uri": "https://localhost:8080/"
-        },
-        "outputId": "23da6a08-c279-4d2c-96b1-d2d849c77d3d"
       },
-      "outputs": [
-        {
-          "output_type": "stream",
-          "name": "stderr",
-          "text": [
-            "No files have been modified since last commit. Skipping to prevent empty commit.\n",
-            "WARNING:huggingface_hub.hf_api:No files have been modified since last commit. Skipping to prevent empty commit.\n"
-          ]
-        }
-      ],
       "source": [
         "hf_upload_folder(BASE_FOLDER)"
       ]
@@ -548,1036 +244,6 @@
     },
     "language_info": {
       "name": "python"
-    },
-    "widgets": {
-      "application/vnd.jupyter.widget-state+json": {
-        "7b825d3c10be4fa6abd7c115f8e4e1f2": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_d426e85f0ce641d381d52927df68c278",
-              "IPY_MODEL_8f30879a15de4244adea101f6dc05c35",
-              "IPY_MODEL_96507b4059764b0493bbb286f4cd1c3a"
-            ],
-            "layout": "IPY_MODEL_21cc893be48e460ab6e83e0f685249c2"
-          }
-        },
-        "d426e85f0ce641d381d52927df68c278": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_047db71ca65846f1bacf16fbcd9c40f8",
-            "placeholder": "",
-            "style": "IPY_MODEL_3813240ddf704b91ac556c05fe7499f7",
-            "value": "samples_mmlu_all_2024-12-05T10-07-32.229420.jsonl: 100%"
-          }
-        },
-        "8f30879a15de4244adea101f6dc05c35": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_46f47846e9cd4b9b9cf321247fe0e518",
-            "max": 89903482,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_922f354c9c5b4c9ebae6ad0e1c87b760",
-            "value": 89903482
-          }
-        },
-        "96507b4059764b0493bbb286f4cd1c3a": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_c1bdaff966f84b949b22b650311b6528",
-            "placeholder": "",
-            "style": "IPY_MODEL_f16a675994434ddb8b5983faf193b381",
-            "value": " 89.9M/89.9M [00:03&lt;00:00, 25.2MB/s]"
-          }
-        },
-        "21cc893be48e460ab6e83e0f685249c2": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "047db71ca65846f1bacf16fbcd9c40f8": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "3813240ddf704b91ac556c05fe7499f7": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "46f47846e9cd4b9b9cf321247fe0e518": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "922f354c9c5b4c9ebae6ad0e1c87b760": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "c1bdaff966f84b949b22b650311b6528": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "f16a675994434ddb8b5983faf193b381": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "79e90f6090ee493890acbf26292dd997": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_d245bcee114a4216bbfc6a1999daca08",
-              "IPY_MODEL_e2acb9e61cec4083b3113415a205df12",
-              "IPY_MODEL_08a63fea880840a2a472008d98bb03d2"
-            ],
-            "layout": "IPY_MODEL_004d4e339ec840c8bb748f4fdcef663a"
-          }
-        },
-        "d245bcee114a4216bbfc6a1999daca08": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_3f9668c6708247f4b65ae9204b5df832",
-            "placeholder": "",
-            "style": "IPY_MODEL_b1189ac61af44228a2e52026e73b0d2e",
-            "value": "samples_mmlu_all_2024-12-05T10-28-27.223863.jsonl: 100%"
-          }
-        },
-        "e2acb9e61cec4083b3113415a205df12": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_8c89d406ae534cf0a7ef99eba4263b2b",
-            "max": 89904912,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_8adaf7159dcf4b318c2fc5930dd8633d",
-            "value": 89904912
-          }
-        },
-        "08a63fea880840a2a472008d98bb03d2": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_24bd51fe6e81460f9a9c99a1961a4107",
-            "placeholder": "",
-            "style": "IPY_MODEL_2b0ffc94bfca468a89f0799cbef29ee5",
-            "value": " 89.9M/89.9M [00:04&lt;00:00, 17.7MB/s]"
-          }
-        },
-        "004d4e339ec840c8bb748f4fdcef663a": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "3f9668c6708247f4b65ae9204b5df832": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "b1189ac61af44228a2e52026e73b0d2e": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "8c89d406ae534cf0a7ef99eba4263b2b": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "8adaf7159dcf4b318c2fc5930dd8633d": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "24bd51fe6e81460f9a9c99a1961a4107": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "2b0ffc94bfca468a89f0799cbef29ee5": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "c2b881fabd4f4758bd9aca516a579b4c": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HBoxModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HBoxModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HBoxView",
-            "box_style": "",
-            "children": [
-              "IPY_MODEL_6ab81736ee0643c7ae5e20978914fef4",
-              "IPY_MODEL_84ee9c147755422fb74229a2817ad35a",
-              "IPY_MODEL_35025439e8e545d28ee9f3c51df4f1bb"
-            ],
-            "layout": "IPY_MODEL_da989b5fe95449f6928355cd317d7f4a"
-          }
-        },
-        "6ab81736ee0643c7ae5e20978914fef4": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_ed8214d39c2e4e47a7b32e1152597642",
-            "placeholder": "",
-            "style": "IPY_MODEL_6210d27773144f78aed58c4ad9aaa0a2",
-            "value": "samples_mmlu_all_2024-12-05T11-08-04.943164.jsonl: 100%"
-          }
-        },
-        "84ee9c147755422fb74229a2817ad35a": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "FloatProgressModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "FloatProgressModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "ProgressView",
-            "bar_style": "success",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_0e65b64cc85c4b52aa48c11f65ccceeb",
-            "max": 89914374,
-            "min": 0,
-            "orientation": "horizontal",
-            "style": "IPY_MODEL_85504ad2f7be45e1a828b5077905dad5",
-            "value": 89914374
-          }
-        },
-        "35025439e8e545d28ee9f3c51df4f1bb": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "HTMLModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_dom_classes": [],
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "HTMLModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/controls",
-            "_view_module_version": "1.5.0",
-            "_view_name": "HTMLView",
-            "description": "",
-            "description_tooltip": null,
-            "layout": "IPY_MODEL_a638974916d74c239e287002bc1d089a",
-            "placeholder": "",
-            "style": "IPY_MODEL_a6fcb58400a54716bda1e4dc511add95",
-            "value": " 89.9M/89.9M [00:10&lt;00:00, 3.64MB/s]"
-          }
-        },
-        "da989b5fe95449f6928355cd317d7f4a": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "ed8214d39c2e4e47a7b32e1152597642": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "6210d27773144f78aed58c4ad9aaa0a2": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        },
-        "0e65b64cc85c4b52aa48c11f65ccceeb": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "85504ad2f7be45e1a828b5077905dad5": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "ProgressStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "ProgressStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "bar_color": null,
-            "description_width": ""
-          }
-        },
-        "a638974916d74c239e287002bc1d089a": {
-          "model_module": "@jupyter-widgets/base",
-          "model_name": "LayoutModel",
-          "model_module_version": "1.2.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/base",
-            "_model_module_version": "1.2.0",
-            "_model_name": "LayoutModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "LayoutView",
-            "align_content": null,
-            "align_items": null,
-            "align_self": null,
-            "border": null,
-            "bottom": null,
-            "display": null,
-            "flex": null,
-            "flex_flow": null,
-            "grid_area": null,
-            "grid_auto_columns": null,
-            "grid_auto_flow": null,
-            "grid_auto_rows": null,
-            "grid_column": null,
-            "grid_gap": null,
-            "grid_row": null,
-            "grid_template_areas": null,
-            "grid_template_columns": null,
-            "grid_template_rows": null,
-            "height": null,
-            "justify_content": null,
-            "justify_items": null,
-            "left": null,
-            "margin": null,
-            "max_height": null,
-            "max_width": null,
-            "min_height": null,
-            "min_width": null,
-            "object_fit": null,
-            "object_position": null,
-            "order": null,
-            "overflow": null,
-            "overflow_x": null,
-            "overflow_y": null,
-            "padding": null,
-            "right": null,
-            "top": null,
-            "visibility": null,
-            "width": null
-          }
-        },
-        "a6fcb58400a54716bda1e4dc511add95": {
-          "model_module": "@jupyter-widgets/controls",
-          "model_name": "DescriptionStyleModel",
-          "model_module_version": "1.5.0",
-          "state": {
-            "_model_module": "@jupyter-widgets/controls",
-            "_model_module_version": "1.5.0",
-            "_model_name": "DescriptionStyleModel",
-            "_view_count": null,
-            "_view_module": "@jupyter-widgets/base",
-            "_view_module_version": "1.2.0",
-            "_view_name": "StyleView",
-            "description_width": ""
-          }
-        }
-      }
     }
   },
   "nbformat": 4,

       "cell_type": "code",
       "execution_count": null,
       "metadata": {
+        "id": "kGW7vfRkrqHe"
       },
+      "outputs": [],
       "source": [
         "!pip install -r https://huggingface.co/flunardelli/llm-metaeval/raw/main/requirements.txt"
       ]
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
+        "id": "uMoitxJkHerH"
       },
+      "outputs": [],
       "source": [
         "hf_upload_folder(BASE_FOLDER)"
       ]
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
+        "id": "eIUOqu5sHfkM"
       },
+      "outputs": [],
       "source": [
         "hf_upload_folder(BASE_FOLDER)"
       ]
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
+        "id": "xsL82Q4SHgMn"
       },
+      "outputs": [],
       "source": [
         "hf_upload_folder(BASE_FOLDER)"
       ]
       "execution_count": null,
       "metadata": {
         "collapsed": true,
+        "id": "ilu9_ulWTy3p"
       },
+      "outputs": [],
       "source": [
         "!lm_eval \\\n",
         "--model hf --model_args pretrained=mistralai/Mixtral-8x7B-Instruct-v0.1,revision=41bd4c9e7e4fb318ca40e721131d4933966c2cc1,trust_remote_code=False,dtype=bfloat16,parallelize=True \\\n",
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
+        "id": "jE5r8gVDHhAz"
       },
+      "outputs": [],
       "source": [
         "hf_upload_folder(BASE_FOLDER)"
       ]
     },
     "language_info": {
       "name": "python"
     }
   },
   "nbformat": 4,

llm_metaeval_eval_harness_results.ipynb CHANGED Viewed

The diff for this file is too large to render. See raw diff