File size: 4,810 Bytes
eeb0016 dbbe441 eeb0016 dbbe441 eeb0016 a853257 eeb0016 1dd1b3f eeb0016 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"gpuType": "T4"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "code",
"source": [
"# @title # ⚡ Imat-AutoGGUF\n",
"\n",
"# @markdown Made by https://huggingface.co/Virt-io\n",
"\n",
"# @markdown Edited https://github.com/mlabonne/llm-course LazyMergekit to work with Imatrix\n",
"\n",
"# @markdown\n",
"\n",
"# @markdown The `token` corresponds to the name of the secret that stores your [Hugging Face access token](https://huggingface.co/settings/tokens) in Colab.\n",
"\n",
"# @markdown ---\n",
"\n",
"# @markdown ### ⚡ Quantization parameters\n",
"MODEL_ID = \"TinyLlama/TinyLlama-1.1B-Chat-v1.0\" # @param {type:\"string\"}\n",
"IMATRIX_OPTION = 'Imatrix' # @param [\"Imatrix\", \"Imatrix-RP\", \"Imatrix-RP-Extended\"]\n",
"if IMATRIX_OPTION == \"Imatrix\":\n",
" IMATRIX = f\"Google-Colab-Imatrix-GGUF/Imatrix/imatrix.txt\"\n",
"if IMATRIX_OPTION == \"Imatrix-RP\":\n",
" IMATRIX = f\"Google-Colab-Imatrix-GGUF/Imatrix/imatrix-with-rp-data.txt\"\n",
"if IMATRIX_OPTION == \"Imatrix-RP-Extended\":\n",
" IMATRIX = f\"Google-Colab-Imatrix-GGUF/Imatrix/imatrix-rp-extended.txt\"\n",
"print(IMATRIX)\n",
"QUANTIZATION_METHODS = \"IQ4_NL, Q8_0\" # @param {type:\"string\"}\n",
"QUANTIZATION_METHODS = QUANTIZATION_METHODS.replace(\" \", \"\").split(\",\")\n",
"\n",
"# @markdown ---\n",
"\n",
"# @markdown ### 🤗 Hugging Face Hub\n",
"username = \"Virt-io\" # @param {type:\"string\"}\n",
"token = \"HF_TOKEN\" # @param {type:\"string\"}\n",
"\n",
"MODEL_NAME = MODEL_ID.split('/')[-1]\n",
"\n",
"# Git clone llamacpp\n",
"!git clone https://github.com/ggerganov/llama.cpp\n",
"!cd llama.cpp && git pull\n",
"\n",
"# Download model\n",
"!git lfs install\n",
"!git clone https://huggingface.co/{MODEL_ID}\n",
"\n",
"# Download Imatrix\n",
"!git clone https://huggingface.co/Virt-io/Google-Colab-Imatrix-GGUF\n",
"\n",
"# Install python dependencies and reload instance\n",
"!pip install -r llama.cpp/requirements/requirements-convert.txt\n",
"\n",
"# Build llamacpp\n",
"!cd llama.cpp && make clean && LLAMA_CUDA=1 LLAMA_LTO=1 LLAMA_CUDA_DMMV_X=64 LLAMA_CUDA_MMV_Y=4 LLAMA_CUDA_KQUANTS_ITER=2 LLAMA_CUDA_F16=1 LLAMA_CUDA_DMMV_F16=1 make -j16\n",
"\n",
"# Convert to fp16\n",
"fp16 = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.gguf\"\n",
"!python llama.cpp/convert.py {MODEL_NAME} --outtype f16 --outfile {fp16}\n",
"\n",
"# Run imatrix\n",
"imat_dat = f\"{fp16}.{IMATRIX_OPTION}.dat\"\n",
"!./llama.cpp/imatrix -ngl 100 -c 512 -b 512 --model {fp16} -f {IMATRIX} -o {imat_dat}\n",
"\n",
"# Quantize the model for each method in the QUANTIZATION_METHODS list\n",
"for method in QUANTIZATION_METHODS:\n",
" qtype = f\"{MODEL_NAME}/{MODEL_NAME.lower()}.{method.upper()}.gguf\"\n",
" !./llama.cpp/quantize --imatrix {imat_dat} {fp16} {qtype} {method}"
],
"metadata": {
"id": "fD24jJxq7t3k"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# @markdown Upload to HF\n",
"!pip install -q huggingface_hub\n",
"from huggingface_hub import create_repo, HfApi\n",
"from google.colab import userdata, runtime\n",
"\n",
"# Defined in the secrets tab in Google Colab\n",
"hf_token = userdata.get(token)\n",
"api = HfApi()\n",
"\n",
"# Create empty repo\n",
"create_repo(\n",
" repo_id = f\"{username}/{MODEL_NAME}-GGUF\",\n",
" repo_type=\"model\",\n",
" exist_ok=True,\n",
" token=hf_token\n",
")\n",
"\n",
"# Upload gguf files\n",
"api.upload_folder(\n",
" folder_path=MODEL_NAME,\n",
" repo_id=f\"{username}/{MODEL_NAME}-GGUF\",\n",
" allow_patterns=[\"*.gguf\", \"*.fp16.gguf\", \"*.dat\", \"*.md\"],\n",
" token=hf_token\n",
")\n",
"\n",
"# Kill runtime\n",
"runtime.unassign()"
],
"metadata": {
"id": "F7Q8_Y1_e3BX"
},
"execution_count": null,
"outputs": []
}
]
} |