Add new data option
Browse files- Imat_AutoGGUF.ipynb +4 -2
Imat_AutoGGUF.ipynb
CHANGED
@@ -33,11 +33,13 @@
|
|
33 |
"\n",
|
34 |
"# @markdown ### ⚡ Quantization parameters\n",
|
35 |
"MODEL_ID = \"TinyLlama/TinyLlama-1.1B-Chat-v1.0\" # @param {type:\"string\"}\n",
|
36 |
-
"IMATRIX_OPTION = 'Imatrix' # @param [\"Imatrix\", \"Imatrix-RP\"]\n",
|
37 |
"if IMATRIX_OPTION == \"Imatrix\":\n",
|
38 |
" IMATRIX = f\"Google-Colab-Imatrix-GGUF/Imatrix/imatrix.txt\"\n",
|
39 |
"if IMATRIX_OPTION == \"Imatrix-RP\":\n",
|
40 |
" IMATRIX = f\"Google-Colab-Imatrix-GGUF/Imatrix/imatrix-with-rp-data.txt\"\n",
|
|
|
|
|
41 |
"print(IMATRIX)\n",
|
42 |
"QUANTIZATION_METHODS = \"IQ4_NL, Q8_0\" # @param {type:\"string\"}\n",
|
43 |
"QUANTIZATION_METHODS = QUANTIZATION_METHODS.replace(\" \", \"\").split(\",\")\n",
|
@@ -65,7 +67,7 @@
|
|
65 |
"!pip install -r llama.cpp/requirements/requirements-convert.txt\n",
|
66 |
"\n",
|
67 |
"# Build llamacpp\n",
|
68 |
-
"!cd llama.cpp && make clean &&
|
69 |
"\n",
|
70 |
"# Convert to fp16\n",
|
71 |
"fp16 = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.gguf\"\n",
|
|
|
33 |
"\n",
|
34 |
"# @markdown ### ⚡ Quantization parameters\n",
|
35 |
"MODEL_ID = \"TinyLlama/TinyLlama-1.1B-Chat-v1.0\" # @param {type:\"string\"}\n",
|
36 |
+
"IMATRIX_OPTION = 'Imatrix' # @param [\"Imatrix\", \"Imatrix-RP\", \"Imatrix-ChatML-test\""]\n",
|
37 |
"if IMATRIX_OPTION == \"Imatrix\":\n",
|
38 |
" IMATRIX = f\"Google-Colab-Imatrix-GGUF/Imatrix/imatrix.txt\"\n",
|
39 |
"if IMATRIX_OPTION == \"Imatrix-RP\":\n",
|
40 |
" IMATRIX = f\"Google-Colab-Imatrix-GGUF/Imatrix/imatrix-with-rp-data.txt\"\n",
|
41 |
+
"if IMATRIX_OPTION == \"Imatrix-ChatML-test\":\n",
|
42 |
+
" IMATRIX = f\"Google-Colab-Imatrix-GGUF/Imatrix/chatml_test.txt\"\n",
|
43 |
"print(IMATRIX)\n",
|
44 |
"QUANTIZATION_METHODS = \"IQ4_NL, Q8_0\" # @param {type:\"string\"}\n",
|
45 |
"QUANTIZATION_METHODS = QUANTIZATION_METHODS.replace(\" \", \"\").split(\",\")\n",
|
|
|
67 |
"!pip install -r llama.cpp/requirements/requirements-convert.txt\n",
|
68 |
"\n",
|
69 |
"# Build llamacpp\n",
|
70 |
+
"!cd llama.cpp && make clean && LLAMA_CUDA=1 LLAMA_LTO=1 LLAMA_CUDA_DMMV_X=64 LLAMA_CUDA_MMV_Y=4 LLAMA_CUDA_KQUANTS_ITER=2 LLAMA_CUDA_F16=1 LLAMA_CUDA_DMMV_F16=1 make -j16\n",
|
71 |
"\n",
|
72 |
"# Convert to fp16\n",
|
73 |
"fp16 = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.gguf\"\n",
|