Files changed (1) hide show
  1. app.py +34 -11
app.py CHANGED
@@ -11,25 +11,41 @@ from huggingface_hub import ModelCard
11
 
12
  from textwrap import dedent
13
 
14
- api = HfApi()
 
 
 
 
 
 
 
 
 
 
15
 
16
  def process_model(model_id, q_method, hf_token):
17
-
18
  MODEL_NAME = model_id.split('/')[-1]
19
  fp16 = f"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.bin"
 
 
20
 
21
  username = whoami(hf_token)["name"]
22
-
23
  snapshot_download(repo_id=model_id, local_dir = f"{MODEL_NAME}", local_dir_use_symlinks=False)
24
  print("Model downloaded successully!")
25
 
26
- fp16_conversion = f"python llama.cpp/convert.py {MODEL_NAME} --outtype f16 --outfile {fp16}"
27
- subprocess.run(fp16_conversion, shell=True)
 
 
 
28
  print("Model converted to fp16 successully!")
29
 
30
  qtype = f"{MODEL_NAME}/{MODEL_NAME.lower()}.{q_method.upper()}.gguf"
31
  quantise_ggml = f"./llama.cpp/quantize {fp16} {qtype} {q_method}"
32
- subprocess.run(quantise_ggml, shell=True)
 
 
33
  print("Quantised successfully!")
34
 
35
  # Create empty repo
@@ -40,8 +56,7 @@ def process_model(model_id, q_method, hf_token):
40
  exist_ok=True,
41
  token=hf_token
42
  )
43
- print("Empty repo created successfully!")
44
-
45
 
46
  card = ModelCard.load(model_id)
47
  card.data.tags = ["llama-cpp"] if card.data.tags is None else card.data.tags + ["llama-cpp"]
@@ -59,6 +74,10 @@ def process_model(model_id, q_method, hf_token):
59
  ```bash
60
  llama-cli --hf-repo {repo_id} --model {qtype.split("/")[-1]} -p "The meaning to life and the universe is "
61
  ```
 
 
 
 
62
  """
63
  )
64
  card.save(os.path.join(MODEL_NAME, "README-new.md"))
@@ -93,17 +112,21 @@ iface = gr.Interface(
93
  gr.Textbox(
94
  lines=1,
95
  label="Hub Model ID",
96
- info="Model repo ID"
 
 
97
  ),
98
  gr.Dropdown(
99
  ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
100
  label="Quantization Method",
101
- info="GGML quantisation type"
 
102
  ),
103
  gr.Textbox(
104
  lines=1,
105
  label="HF Write Token",
106
- info="https://hf.co/settings/token"
 
107
  )
108
  ],
109
  outputs=[
 
11
 
12
  from textwrap import dedent
13
 
14
+ LLAMA_LIKE_ARCHS = ["MistralForCausalLM", "LlamaForCausalLM"]
15
+
16
+ def script_to_use(model_id, api):
17
+ info = api.model_info(model_id)
18
+ if info.config is None:
19
+ return None
20
+ arch = info.config.get("architectures", None)
21
+ if arch is None:
22
+ return None
23
+ arch = arch[0]
24
+ return "convert.py" if arch in LLAMA_LIKE_ARCHS else "convert-hf-to-gguf.py"
25
 
26
  def process_model(model_id, q_method, hf_token):
 
27
  MODEL_NAME = model_id.split('/')[-1]
28
  fp16 = f"{MODEL_NAME}/{MODEL_NAME.lower()}.fp16.bin"
29
+
30
+ api = HfApi(token=hf_token)
31
 
32
  username = whoami(hf_token)["name"]
33
+
34
  snapshot_download(repo_id=model_id, local_dir = f"{MODEL_NAME}", local_dir_use_symlinks=False)
35
  print("Model downloaded successully!")
36
 
37
+ conversion_script = script_to_use(model_id, api)
38
+ fp16_conversion = f"python llama.cpp/{conversion_script} {MODEL_NAME} --outtype f16 --outfile {fp16}"
39
+ result = subprocess.run(fp16_conversion, shell=True, capture_output=True)
40
+ if result.returncode != 0:
41
+ return (f"Error converting to fp16: {result.stderr}", "error.png")
42
  print("Model converted to fp16 successully!")
43
 
44
  qtype = f"{MODEL_NAME}/{MODEL_NAME.lower()}.{q_method.upper()}.gguf"
45
  quantise_ggml = f"./llama.cpp/quantize {fp16} {qtype} {q_method}"
46
+ result = subprocess.run(quantise_ggml, shell=True, capture_output=True)
47
+ if result.returncode != 0:
48
+ return (f"Error quantizing: {result.stderr}", "error.png")
49
  print("Quantised successfully!")
50
 
51
  # Create empty repo
 
56
  exist_ok=True,
57
  token=hf_token
58
  )
59
+ print("Repo created successfully!")
 
60
 
61
  card = ModelCard.load(model_id)
62
  card.data.tags = ["llama-cpp"] if card.data.tags is None else card.data.tags + ["llama-cpp"]
 
74
  ```bash
75
  llama-cli --hf-repo {repo_id} --model {qtype.split("/")[-1]} -p "The meaning to life and the universe is "
76
  ```
77
+
78
+ ```bash
79
+ llama-server --hf-repo {repo_id} --model {qtype.split("/")[-1]} -c 2048
80
+ ```
81
  """
82
  )
83
  card.save(os.path.join(MODEL_NAME, "README-new.md"))
 
112
  gr.Textbox(
113
  lines=1,
114
  label="Hub Model ID",
115
+ info="Model repo ID",
116
+ placeholder="TinyLlama/TinyLlama-1.1B-Chat-v1.0",
117
+ value="TinyLlama/TinyLlama-1.1B-Chat-v1.0"
118
  ),
119
  gr.Dropdown(
120
  ["Q2_K", "Q3_K_S", "Q3_K_M", "Q3_K_L", "Q4_0", "Q4_K_S", "Q4_K_M", "Q5_0", "Q5_K_S", "Q5_K_M", "Q6_K", "Q8_0"],
121
  label="Quantization Method",
122
+ info="GGML quantisation type",
123
+ value="Q4_K_M",
124
  ),
125
  gr.Textbox(
126
  lines=1,
127
  label="HF Write Token",
128
+ info="https://hf.co/settings/token",
129
+ type="password",
130
  )
131
  ],
132
  outputs=[