matrixportal commited on
Commit
a347f05
·
verified ·
1 Parent(s): c4a3660

Upload 11 files

Browse files
Files changed (12) hide show
  1. .gitattributes +1 -0
  2. Dockerfile +65 -0
  3. README.md +14 -8
  4. app.py +434 -0
  5. docker-compose.yml +16 -0
  6. dockerignore +3 -0
  7. error.png +0 -0
  8. gitattributes +37 -0
  9. gitignore +167 -0
  10. groups_merged.txt +0 -0
  11. llama.png +3 -0
  12. start.sh +21 -0
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ llama.png filter=lfs diff=lfs merge=lfs -text
Dockerfile ADDED
@@ -0,0 +1,65 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM nvidia/cuda:11.8.0-cudnn8-devel-ubuntu22.04
2
+
3
+ ENV DEBIAN_FRONTEND=noninteractive
4
+ RUN apt-get update && \
5
+ apt-get upgrade -y && \
6
+ apt-get install -y --no-install-recommends \
7
+ git \
8
+ git-lfs \
9
+ wget \
10
+ curl \
11
+ cmake \
12
+ # python build dependencies \
13
+ build-essential \
14
+ libssl-dev \
15
+ zlib1g-dev \
16
+ libbz2-dev \
17
+ libreadline-dev \
18
+ libsqlite3-dev \
19
+ libncursesw5-dev \
20
+ xz-utils \
21
+ tk-dev \
22
+ libxml2-dev \
23
+ libxmlsec1-dev \
24
+ libffi-dev \
25
+ liblzma-dev \
26
+ ffmpeg \
27
+ nvidia-driver-515
28
+
29
+ RUN useradd -m -u 1000 user
30
+ USER user
31
+ ENV HOME=/home/user \
32
+ PATH=/home/user/.local/bin:${PATH}
33
+ WORKDIR ${HOME}/app
34
+
35
+ RUN curl https://pyenv.run | bash
36
+ ENV PATH=${HOME}/.pyenv/shims:${HOME}/.pyenv/bin:${PATH}
37
+ ARG PYTHON_VERSION=3.10.13
38
+ RUN pyenv install ${PYTHON_VERSION} && \
39
+ pyenv global ${PYTHON_VERSION} && \
40
+ pyenv rehash && \
41
+ pip install --no-cache-dir -U pip setuptools wheel && \
42
+ pip install "huggingface-hub" "hf-transfer" "gradio[oauth]>=4.28.0" "gradio_huggingfacehub_search==0.0.7" "APScheduler"
43
+
44
+ COPY --chown=1000 . ${HOME}/app
45
+ RUN git clone https://github.com/ggerganov/llama.cpp
46
+ RUN pip install -r llama.cpp/requirements.txt
47
+
48
+ COPY groups_merged.txt ${HOME}/app/llama.cpp/
49
+
50
+ ENV PYTHONPATH=${HOME}/app \
51
+ PYTHONUNBUFFERED=1 \
52
+ HF_HUB_ENABLE_HF_TRANSFER=1 \
53
+ GRADIO_ALLOW_FLAGGING=never \
54
+ GRADIO_NUM_PORTS=1 \
55
+ GRADIO_SERVER_NAME=0.0.0.0 \
56
+ GRADIO_THEME=huggingface \
57
+ TQDM_POSITION=-1 \
58
+ TQDM_MININTERVAL=1 \
59
+ SYSTEM=spaces \
60
+ LD_LIBRARY_PATH=/usr/local/cuda/lib64:${LD_LIBRARY_PATH} \
61
+ PATH=/usr/local/nvidia/bin:${PATH}
62
+
63
+
64
+ ENTRYPOINT /bin/bash start.sh
65
+
README.md CHANGED
@@ -1,13 +1,19 @@
1
  ---
2
- title: Deneme
3
- emoji: 👀
4
- colorFrom: red
5
- colorTo: red
6
- sdk: gradio
7
- sdk_version: 5.12.0
8
- app_file: app.py
 
 
 
9
  pinned: false
10
- short_description: deneme space
11
  ---
12
 
13
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
 
 
 
 
1
  ---
2
+ title: GGUF My Repo
3
+ emoji: 🦙
4
+ colorFrom: gray
5
+ colorTo: pink
6
+ sdk: docker
7
+ hf_oauth: true
8
+ hf_oauth_scopes:
9
+ - read-repos
10
+ - write-repos
11
+ - manage-repos
12
  pinned: false
 
13
  ---
14
 
15
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
16
+
17
+ To run this space locally:
18
+ 1. Login huggingface CLI: `huggingface-cli login`
19
+ 2. Run command: `HF_TOKEN=$(cat ~/.cache/huggingface/token) docker compose up`
app.py ADDED
@@ -0,0 +1,434 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import subprocess
3
+ import signal
4
+ os.environ["GRADIO_ANALYTICS_ENABLED"] = "False"
5
+ import gradio as gr
6
+ import tempfile
7
+
8
+ from huggingface_hub import HfApi, ModelCard, whoami
9
+ from gradio_huggingfacehub_search import HuggingfaceHubSearch
10
+ from pathlib import Path
11
+ from textwrap import dedent
12
+ from apscheduler.schedulers.background import BackgroundScheduler
13
+
14
+
15
+ # used for restarting the space
16
+ HF_TOKEN = os.environ.get("HF_TOKEN")
17
+ CONVERSION_SCRIPT = "./llama.cpp/convert_hf_to_gguf.py"
18
+
19
+ # escape HTML for logging
20
+ def escape(s: str) -> str:
21
+ s = s.replace("&", "&") # Must be done first!
22
+ s = s.replace("<", "&lt;")
23
+ s = s.replace(">", "&gt;")
24
+ s = s.replace('"', "&quot;")
25
+ s = s.replace("\n", "<br/>")
26
+ return s
27
+
28
+ def generate_importance_matrix(model_path: str, train_data_path: str, output_path: str):
29
+ imatrix_command = [
30
+ "./llama.cpp/llama-imatrix",
31
+ "-m", model_path,
32
+ "-f", train_data_path,
33
+ "-ngl", "99",
34
+ "--output-frequency", "10",
35
+ "-o", output_path,
36
+ ]
37
+
38
+ if not os.path.isfile(model_path):
39
+ raise Exception(f"Model file not found: {model_path}")
40
+
41
+ print("Running imatrix command...")
42
+ process = subprocess.Popen(imatrix_command, shell=False)
43
+
44
+ try:
45
+ process.wait(timeout=60) # added wait
46
+ except subprocess.TimeoutExpired:
47
+ print("Imatrix computation timed out. Sending SIGINT to allow graceful termination...")
48
+ process.send_signal(signal.SIGINT)
49
+ try:
50
+ process.wait(timeout=5) # grace period
51
+ except subprocess.TimeoutExpired:
52
+ print("Imatrix proc still didn't term. Forecfully terming process...")
53
+ process.kill()
54
+
55
+ print("Importance matrix generation completed.")
56
+
57
+ def split_upload_model(model_path: str, outdir: str, repo_id: str, oauth_token: gr.OAuthToken | None, split_max_tensors=256, split_max_size=None):
58
+ print(f"Model path: {model_path}")
59
+ print(f"Output dir: {outdir}")
60
+
61
+ if oauth_token.token is None:
62
+ raise ValueError("You have to be logged in.")
63
+
64
+ split_cmd = [
65
+ "./llama.cpp/llama-gguf-split",
66
+ "--split",
67
+ ]
68
+ if split_max_size:
69
+ split_cmd.append("--split-max-size")
70
+ split_cmd.append(split_max_size)
71
+ else:
72
+ split_cmd.append("--split-max-tensors")
73
+ split_cmd.append(str(split_max_tensors))
74
+
75
+ # args for output
76
+ model_path_prefix = '.'.join(model_path.split('.')[:-1]) # remove the file extension
77
+ split_cmd.append(model_path)
78
+ split_cmd.append(model_path_prefix)
79
+
80
+ print(f"Split command: {split_cmd}")
81
+
82
+ result = subprocess.run(split_cmd, shell=False, capture_output=True, text=True)
83
+ print(f"Split command stdout: {result.stdout}")
84
+ print(f"Split command stderr: {result.stderr}")
85
+
86
+ if result.returncode != 0:
87
+ stderr_str = result.stderr.decode("utf-8")
88
+ raise Exception(f"Error splitting the model: {stderr_str}")
89
+ print("Model split successfully!")
90
+
91
+ # remove the original model file if needed
92
+ if os.path.exists(model_path):
93
+ os.remove(model_path)
94
+
95
+ model_file_prefix = model_path_prefix.split('/')[-1]
96
+ print(f"Model file name prefix: {model_file_prefix}")
97
+ sharded_model_files = [f for f in os.listdir(outdir) if f.startswith(model_file_prefix) and f.endswith(".gguf")]
98
+ if sharded_model_files:
99
+ print(f"Sharded model files: {sharded_model_files}")
100
+ api = HfApi(token=oauth_token.token)
101
+ for file in sharded_model_files:
102
+ file_path = os.path.join(outdir, file)
103
+ print(f"Uploading file: {file_path}")
104
+ try:
105
+ api.upload_file(
106
+ path_or_fileobj=file_path,
107
+ path_in_repo=file,
108
+ repo_id=repo_id,
109
+ )
110
+ except Exception as e:
111
+ raise Exception(f"Error uploading file {file_path}: {e}")
112
+ else:
113
+ raise Exception("No sharded files found.")
114
+
115
+ print("Sharded model has been uploaded successfully!")
116
+
117
+ def process_model(model_id, q_method, use_imatrix, imatrix_q_method, private_repo, train_data_file, split_model, split_max_tensors, split_max_size, oauth_token: gr.OAuthToken | None):
118
+ if oauth_token is None or oauth_token.token is None:
119
+ raise ValueError("You must be logged in to use GGUF-my-repo")
120
+ model_name = model_id.split('/')[-1]
121
+
122
+ try:
123
+ api = HfApi(token=oauth_token.token)
124
+
125
+ dl_pattern = ["*.md", "*.json", "*.model"]
126
+
127
+ pattern = (
128
+ "*.safetensors"
129
+ if any(
130
+ file.path.endswith(".safetensors")
131
+ for file in api.list_repo_tree(
132
+ repo_id=model_id,
133
+ recursive=True,
134
+ )
135
+ )
136
+ else "*.bin"
137
+ )
138
+
139
+ dl_pattern += [pattern]
140
+
141
+ if not os.path.exists("downloads"):
142
+ os.makedirs("downloads")
143
+
144
+ if not os.path.exists("outputs"):
145
+ os.makedirs("outputs")
146
+
147
+ with tempfile.TemporaryDirectory(dir="outputs") as outdir:
148
+ fp16 = str(Path(outdir)/f"{model_name}.fp16.gguf")
149
+
150
+ with tempfile.TemporaryDirectory(dir="downloads") as tmpdir:
151
+ # Keep the model name as the dirname so the model name metadata is populated correctly
152
+ local_dir = Path(tmpdir)/model_name
153
+ print(local_dir)
154
+ api.snapshot_download(repo_id=model_id, local_dir=local_dir, local_dir_use_symlinks=False, allow_patterns=dl_pattern)
155
+ print("Model downloaded successfully!")
156
+ print(f"Current working directory: {os.getcwd()}")
157
+ print(f"Model directory contents: {os.listdir(local_dir)}")
158
+
159
+ config_dir = local_dir/"config.json"
160
+ adapter_config_dir = local_dir/"adapter_config.json"
161
+ if os.path.exists(adapter_config_dir) and not os.path.exists(config_dir):
162
+ raise Exception('adapter_config.json is present.<br/><br/>If you are converting a LoRA adapter to GGUF, please use <a href="https://huggingface.co/spaces/ggml-org/gguf-my-lora" target="_blank" style="text-decoration:underline">GGUF-my-lora</a>.')
163
+
164
+ result = subprocess.run([
165
+ "python", CONVERSION_SCRIPT, local_dir, "--outtype", "f16", "--outfile", fp16
166
+ ], shell=False, capture_output=True)
167
+ print(result)
168
+ if result.returncode != 0:
169
+ stderr_str = result.stderr.decode("utf-8")
170
+ raise Exception(f"Error converting to fp16: {stderr_str}")
171
+ print("Model converted to fp16 successfully!")
172
+ print(f"Converted model path: {fp16}")
173
+
174
+ imatrix_path = Path(outdir)/"imatrix.dat"
175
+
176
+ if use_imatrix:
177
+ if train_data_file:
178
+ train_data_path = train_data_file.name
179
+ else:
180
+ train_data_path = "llama.cpp/groups_merged.txt" #fallback calibration dataset
181
+
182
+ print(f"Training data file path: {train_data_path}")
183
+
184
+ if not os.path.isfile(train_data_path):
185
+ raise Exception(f"Training data file not found: {train_data_path}")
186
+
187
+ generate_importance_matrix(fp16, train_data_path, imatrix_path)
188
+ else:
189
+ print("Not using imatrix quantization.")
190
+
191
+ # Quantize the model
192
+ quantized_gguf_name = f"{model_name.lower()}-{imatrix_q_method.lower()}-imat.gguf" if use_imatrix else f"{model_name.lower()}-{q_method.lower()}.gguf"
193
+ quantized_gguf_path = str(Path(outdir)/quantized_gguf_name)
194
+ if use_imatrix:
195
+ quantise_ggml = [
196
+ "./llama.cpp/llama-quantize",
197
+ "--imatrix", imatrix_path, fp16, quantized_gguf_path, imatrix_q_method
198
+ ]
199
+ else:
200
+ quantise_ggml = [
201
+ "./llama.cpp/llama-quantize",
202
+ fp16, quantized_gguf_path, q_method
203
+ ]
204
+ result = subprocess.run(quantise_ggml, shell=False, capture_output=True)
205
+ if result.returncode != 0:
206
+ stderr_str = result.stderr.decode("utf-8")
207
+ raise Exception(f"Error quantizing: {stderr_str}")
208
+ print(f"Quantized successfully with {imatrix_q_method if use_imatrix else q_method} option!")
209
+ print(f"Quantized model path: {quantized_gguf_path}")
210
+
211
+ # Create empty repo
212
+ username = whoami(oauth_token.token)["name"]
213
+ new_repo_url = api.create_repo(repo_id=f"{username}/{model_name}-GGUF", exist_ok=True, private=private_repo)
214
+ new_repo_id = new_repo_url.repo_id
215
+ print("Repo created successfully!", new_repo_url)
216
+
217
+ try:
218
+ card = ModelCard.load(model_id, token=oauth_token.token)
219
+ except:
220
+ card = ModelCard("")
221
+ if card.data.tags is None:
222
+ card.data.tags = []
223
+ card.data.tags.append("llama-cpp")
224
+ card.data.tags.append("gguf-my-repo")
225
+ card.data.base_model = model_id
226
+ card.text = dedent(
227
+ f"""
228
+ # {new_repo_id}
229
+ This model was converted to GGUF format from [`{model_id}`](https://huggingface.co/{model_id}) using llama.cpp via the ggml.ai's [GGUF-my-repo](https://huggingface.co/spaces/ggml-org/gguf-my-repo) space.
230
+ Refer to the [original model card](https://huggingface.co/{model_id}) for more details on the model.
231
+
232
+ ## Use with llama.cpp
233
+ Install llama.cpp through brew (works on Mac and Linux)
234
+
235
+ ```bash
236
+ brew install llama.cpp
237
+
238
+ ```
239
+ Invoke the llama.cpp server or the CLI.
240
+
241
+ ### CLI:
242
+ ```bash
243
+ llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
244
+ ```
245
+
246
+ ### Server:
247
+ ```bash
248
+ llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
249
+ ```
250
+
251
+ Note: You can also use this checkpoint directly through the [usage steps](https://github.com/ggerganov/llama.cpp?tab=readme-ov-file#usage) listed in the Llama.cpp repo as well.
252
+
253
+ Step 1: Clone llama.cpp from GitHub.
254
+ ```
255
+ git clone https://github.com/ggerganov/llama.cpp
256
+ ```
257
+
258
+ Step 2: Move into the llama.cpp folder and build it with `LLAMA_CURL=1` flag along with other hardware-specific flags (for ex: LLAMA_CUDA=1 for Nvidia GPUs on Linux).
259
+ ```
260
+ cd llama.cpp && LLAMA_CURL=1 make
261
+ ```
262
+
263
+ Step 3: Run inference through the main binary.
264
+ ```
265
+ ./llama-cli --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -p "The meaning to life and the universe is"
266
+ ```
267
+ or
268
+ ```
269
+ ./llama-server --hf-repo {new_repo_id} --hf-file {quantized_gguf_name} -c 2048
270
+ ```
271
+ """
272
+ )
273
+ readme_path = Path(outdir)/"README.md"
274
+ card.save(readme_path)
275
+
276
+ if split_model:
277
+ split_upload_model(str(quantized_gguf_path), outdir, new_repo_id, oauth_token, split_max_tensors, split_max_size)
278
+ else:
279
+ try:
280
+ print(f"Uploading quantized model: {quantized_gguf_path}")
281
+ api.upload_file(
282
+ path_or_fileobj=quantized_gguf_path,
283
+ path_in_repo=quantized_gguf_name,
284
+ repo_id=new_repo_id,
285
+ )
286
+ except Exception as e:
287
+ raise Exception(f"Error uploading quantized model: {e}")
288
+
289
+ if os.path.isfile(imatrix_path):
290
+ try:
291
+ print(f"Uploading imatrix.dat: {imatrix_path}")
292
+ api.upload_file(
293
+ path_or_fileobj=imatrix_path,
294
+ path_in_repo="imatrix.dat",
295
+ repo_id=new_repo_id,
296
+ )
297
+ except Exception as e:
298
+ raise Exception(f"Error uploading imatrix.dat: {e}")
299
+
300
+ api.upload_file(
301
+ path_or_fileobj=readme_path,
302
+ path_in_repo="README.md",
303
+ repo_id=new_repo_id,
304
+ )
305
+ print(f"Uploaded successfully with {imatrix_q_method if use_imatrix else q_method} option!")
306
+
307
+ # end of the TemporaryDirectory(dir="outputs") block; temporary outputs are deleted here
308
+
309
+ return (
310
+ f'<h1>✅ DONE</h1><br/>Find your repo here: <a href="{new_repo_url}" target="_blank" style="text-decoration:underline">{new_repo_id}</a>',
311
+ "llama.png",
312
+ )
313
+ except Exception as e:
314
+ return (f'<h1>❌ ERROR</h1><br/><pre style="white-space:pre-wrap;">{escape(str(e))}</pre>', "error.png")
315
+
316
+
317
+ css="""/* Custom CSS to allow scrolling */
318
+ .gradio-container {overflow-y: auto;}
319
+ """
320
+ # Create Gradio interface
321
+ with gr.Blocks(css=css) as demo:
322
+ gr.Markdown("You must be logged in to use GGUF-my-repo.")
323
+ gr.LoginButton(min_width=250)
324
+
325
+ model_id = HuggingfaceHubSearch(
326
+ label="Hub Model ID",
327
+ placeholder="Search for model id on Huggingface",
328
+ search_type="model",
329
+ )
330
+
331
+ q_method = gr.Dropdown(
332
+ ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
333
+ label="Quantization Method",
334
+ info="GGML quantization type",
335
+ value="Q4_K_M",
336
+ filterable=False,
337
+ visible=True
338
+ )
339
+
340
+ imatrix_q_method = gr.Dropdown(
341
+ ["IQ3_M", "IQ3_XXS", "Q4_K_M", "Q4_K_S", "IQ4_NL", "IQ4_XS", "Q5_K_M", "Q5_K_S"],
342
+ label="Imatrix Quantization Method",
343
+ info="GGML imatrix quants type",
344
+ value="IQ4_NL",
345
+ filterable=False,
346
+ visible=False
347
+ )
348
+
349
+ use_imatrix = gr.Checkbox(
350
+ value=False,
351
+ label="Use Imatrix Quantization",
352
+ info="Use importance matrix for quantization."
353
+ )
354
+
355
+ private_repo = gr.Checkbox(
356
+ value=False,
357
+ label="Private Repo",
358
+ info="Create a private repo under your username."
359
+ )
360
+
361
+ train_data_file = gr.File(
362
+ label="Training Data File",
363
+ file_types=["txt"],
364
+ visible=False
365
+ )
366
+
367
+ split_model = gr.Checkbox(
368
+ value=False,
369
+ label="Split Model",
370
+ info="Shard the model using gguf-split."
371
+ )
372
+
373
+ split_max_tensors = gr.Number(
374
+ value=256,
375
+ label="Max Tensors per File",
376
+ info="Maximum number of tensors per file when splitting model.",
377
+ visible=False
378
+ )
379
+
380
+ split_max_size = gr.Textbox(
381
+ label="Max File Size",
382
+ info="Maximum file size when splitting model (--split-max-size). May leave empty to use the default. Accepted suffixes: M, G. Example: 256M, 5G",
383
+ visible=False
384
+ )
385
+
386
+ def update_visibility(use_imatrix):
387
+ return gr.update(visible=not use_imatrix), gr.update(visible=use_imatrix), gr.update(visible=use_imatrix)
388
+
389
+ use_imatrix.change(
390
+ fn=update_visibility,
391
+ inputs=use_imatrix,
392
+ outputs=[q_method, imatrix_q_method, train_data_file]
393
+ )
394
+
395
+ iface = gr.Interface(
396
+ fn=process_model,
397
+ inputs=[
398
+ model_id,
399
+ q_method,
400
+ use_imatrix,
401
+ imatrix_q_method,
402
+ private_repo,
403
+ train_data_file,
404
+ split_model,
405
+ split_max_tensors,
406
+ split_max_size,
407
+ ],
408
+ outputs=[
409
+ gr.Markdown(label="output"),
410
+ gr.Image(show_label=False),
411
+ ],
412
+ title="Create your own GGUF Quants, blazingly fast ⚡!",
413
+ description="The space takes an HF repo as an input, quantizes it and creates a Public repo containing the selected quant under your HF user namespace.",
414
+ api_name=False
415
+ )
416
+
417
+ def update_split_visibility(split_model):
418
+ return gr.update(visible=split_model), gr.update(visible=split_model)
419
+
420
+ split_model.change(
421
+ fn=update_split_visibility,
422
+ inputs=split_model,
423
+ outputs=[split_max_tensors, split_max_size]
424
+ )
425
+
426
+ def restart_space():
427
+ HfApi().restart_space(repo_id="ggml-org/gguf-my-repo", token=HF_TOKEN, factory_reboot=True)
428
+
429
+ scheduler = BackgroundScheduler()
430
+ scheduler.add_job(restart_space, "interval", seconds=21600)
431
+ scheduler.start()
432
+
433
+ # Launch the interface
434
+ demo.queue(default_concurrency_limit=1, max_size=5).launch(debug=True, show_api=False)
docker-compose.yml ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Docker compose file to LOCAL development
2
+
3
+ services:
4
+ gguf-my-repo:
5
+ build:
6
+ context: .
7
+ dockerfile: Dockerfile
8
+ image: gguf-my-repo
9
+ container_name: gguf-my-repo
10
+ ports:
11
+ - "7860:7860"
12
+ volumes:
13
+ - .:/home/user/app
14
+ environment:
15
+ - RUN_LOCALLY=1
16
+ - HF_TOKEN=${HF_TOKEN}
dockerignore ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ /downloads
2
+ /llama.cpp
3
+ /outputs
error.png ADDED
gitattributes ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ *.7z filter=lfs diff=lfs merge=lfs -text
2
+ *.arrow filter=lfs diff=lfs merge=lfs -text
3
+ *.bin filter=lfs diff=lfs merge=lfs -text
4
+ *.bz2 filter=lfs diff=lfs merge=lfs -text
5
+ *.ckpt filter=lfs diff=lfs merge=lfs -text
6
+ *.ftz filter=lfs diff=lfs merge=lfs -text
7
+ *.gz filter=lfs diff=lfs merge=lfs -text
8
+ *.h5 filter=lfs diff=lfs merge=lfs -text
9
+ *.joblib filter=lfs diff=lfs merge=lfs -text
10
+ *.lfs.* filter=lfs diff=lfs merge=lfs -text
11
+ *.mlmodel filter=lfs diff=lfs merge=lfs -text
12
+ *.model filter=lfs diff=lfs merge=lfs -text
13
+ *.msgpack filter=lfs diff=lfs merge=lfs -text
14
+ *.npy filter=lfs diff=lfs merge=lfs -text
15
+ *.npz filter=lfs diff=lfs merge=lfs -text
16
+ *.onnx filter=lfs diff=lfs merge=lfs -text
17
+ *.ot filter=lfs diff=lfs merge=lfs -text
18
+ *.parquet filter=lfs diff=lfs merge=lfs -text
19
+ *.pb filter=lfs diff=lfs merge=lfs -text
20
+ *.pickle filter=lfs diff=lfs merge=lfs -text
21
+ *.pkl filter=lfs diff=lfs merge=lfs -text
22
+ *.pt filter=lfs diff=lfs merge=lfs -text
23
+ *.pth filter=lfs diff=lfs merge=lfs -text
24
+ *.rar filter=lfs diff=lfs merge=lfs -text
25
+ *.safetensors filter=lfs diff=lfs merge=lfs -text
26
+ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
27
+ *.tar.* filter=lfs diff=lfs merge=lfs -text
28
+ *.tar filter=lfs diff=lfs merge=lfs -text
29
+ *.tflite filter=lfs diff=lfs merge=lfs -text
30
+ *.tgz filter=lfs diff=lfs merge=lfs -text
31
+ *.wasm filter=lfs diff=lfs merge=lfs -text
32
+ *.xz filter=lfs diff=lfs merge=lfs -text
33
+ *.zip filter=lfs diff=lfs merge=lfs -text
34
+ *.zst filter=lfs diff=lfs merge=lfs -text
35
+ *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ llama.png filter=lfs diff=lfs merge=lfs -text
37
+ imatrix_calibration.txt filter=lfs diff=lfs merge=lfs -text
gitignore ADDED
@@ -0,0 +1,167 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Byte-compiled / optimized / DLL files
2
+ __pycache__/
3
+ *.py[cod]
4
+ *$py.class
5
+
6
+ # C extensions
7
+ *.so
8
+
9
+ # Distribution / packaging
10
+ .Python
11
+ build/
12
+ develop-eggs/
13
+ dist/
14
+ downloads/
15
+ eggs/
16
+ .eggs/
17
+ lib/
18
+ lib64/
19
+ parts/
20
+ sdist/
21
+ var/
22
+ wheels/
23
+ share/python-wheels/
24
+ *.egg-info/
25
+ .installed.cfg
26
+ *.egg
27
+ MANIFEST
28
+
29
+ # PyInstaller
30
+ # Usually these files are written by a python script from a template
31
+ # before PyInstaller builds the exe, so as to inject date/other infos into it.
32
+ *.manifest
33
+ *.spec
34
+
35
+ # Installer logs
36
+ pip-log.txt
37
+ pip-delete-this-directory.txt
38
+
39
+ # Unit test / coverage reports
40
+ htmlcov/
41
+ .tox/
42
+ .nox/
43
+ .coverage
44
+ .coverage.*
45
+ .cache
46
+ nosetests.xml
47
+ coverage.xml
48
+ *.cover
49
+ *.py,cover
50
+ .hypothesis/
51
+ .pytest_cache/
52
+ cover/
53
+
54
+ # Translations
55
+ *.mo
56
+ *.pot
57
+
58
+ # Django stuff:
59
+ *.log
60
+ local_settings.py
61
+ db.sqlite3
62
+ db.sqlite3-journal
63
+
64
+ # Flask stuff:
65
+ instance/
66
+ .webassets-cache
67
+
68
+ # Scrapy stuff:
69
+ .scrapy
70
+
71
+ # Sphinx documentation
72
+ docs/_build/
73
+
74
+ # PyBuilder
75
+ .pybuilder/
76
+ target/
77
+
78
+ # Jupyter Notebook
79
+ .ipynb_checkpoints
80
+
81
+ # IPython
82
+ profile_default/
83
+ ipython_config.py
84
+
85
+ # pyenv
86
+ # For a library or package, you might want to ignore these files since the code is
87
+ # intended to run in multiple environments; otherwise, check them in:
88
+ # .python-version
89
+
90
+ # pipenv
91
+ # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92
+ # However, in case of collaboration, if having platform-specific dependencies or dependencies
93
+ # having no cross-platform support, pipenv may install dependencies that don't work, or not
94
+ # install all needed dependencies.
95
+ #Pipfile.lock
96
+
97
+ # poetry
98
+ # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99
+ # This is especially recommended for binary packages to ensure reproducibility, and is more
100
+ # commonly ignored for libraries.
101
+ # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102
+ #poetry.lock
103
+
104
+ # pdm
105
+ # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106
+ #pdm.lock
107
+ # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108
+ # in version control.
109
+ # https://pdm.fming.dev/latest/usage/project/#working-with-version-control
110
+ .pdm.toml
111
+ .pdm-python
112
+ .pdm-build/
113
+
114
+ # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
115
+ __pypackages__/
116
+
117
+ # Celery stuff
118
+ celerybeat-schedule
119
+ celerybeat.pid
120
+
121
+ # SageMath parsed files
122
+ *.sage.py
123
+
124
+ # Environments
125
+ .env
126
+ .venv
127
+ env/
128
+ venv/
129
+ ENV/
130
+ env.bak/
131
+ venv.bak/
132
+
133
+ # Spyder project settings
134
+ .spyderproject
135
+ .spyproject
136
+
137
+ # Rope project settings
138
+ .ropeproject
139
+
140
+ # mkdocs documentation
141
+ /site
142
+
143
+ # mypy
144
+ .mypy_cache/
145
+ .dmypy.json
146
+ dmypy.json
147
+
148
+ # Pyre type checker
149
+ .pyre/
150
+
151
+ # pytype static type analyzer
152
+ .pytype/
153
+
154
+ # Cython debug symbols
155
+ cython_debug/
156
+
157
+ # PyCharm
158
+ # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
159
+ # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
160
+ # and can be added to the global gitignore or merged into this file. For a more nuclear
161
+ # option (not recommended) you can uncomment the following to ignore the entire idea folder.
162
+ #.idea/
163
+
164
+ /downloads
165
+ !/downloads/.keep
166
+ /llama.cpp
167
+ /outputs
groups_merged.txt ADDED
The diff for this file is too large to render. See raw diff
 
llama.png ADDED

Git LFS Details

  • SHA256: a287a47ae4c6f87a363471130be4c916948664792a7a8efbca1bdaaf8d016ebc
  • Pointer size: 132 Bytes
  • Size of remote file: 1.8 MB
start.sh ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ if [ ! -d "llama.cpp" ]; then
4
+ # only run in dev env
5
+ git clone https://github.com/ggerganov/llama.cpp
6
+ fi
7
+
8
+ export GGML_CUDA=OFF
9
+ if [[ -z "${RUN_LOCALLY}" ]]; then
10
+ # enable CUDA if NOT running locally
11
+ export GGML_CUDA=ON
12
+ fi
13
+
14
+ cd llama.cpp
15
+ cmake -B build -DBUILD_SHARED_LIBS=OFF
16
+ cmake --build build --config Release -j --target llama-quantize llama-gguf-split llama-imatrix
17
+ cp ./build/bin/llama-* .
18
+ rm -rf build
19
+
20
+ cd ..
21
+ python app.py