spawn99 commited on
Commit
b3bc137
·
verified ·
1 Parent(s): 3c53b8d

add support for FP16 conversion and fix naming

Browse files

Address #38 #37 and Github https://github.com/ml-explore/mlx-examples/issues/1320

Files changed (1) hide show
  1. app.py +24 -14
app.py CHANGED
@@ -108,15 +108,25 @@ def process_model(model_id, q_method, oauth_token: gr.OAuthToken | None):
108
  model_name = model_id.split('/')[-1]
109
  username = whoami(oauth_token.token)["name"]
110
  try:
111
- upload_repo = f"{username}/{model_name}-{q_method}-mlx"
112
- print(upload_repo)
113
- with tempfile.TemporaryDirectory(dir="converted") as tmpdir:
114
- # The target dir must not exist
115
- mlx_path = os.path.join(tmpdir, "mlx")
116
- convert(model_id, mlx_path=mlx_path, quantize=True, q_bits=QUANT_PARAMS[q_method])
117
- print("Conversion done")
118
- upload_to_hub(path=mlx_path, upload_repo=upload_repo, hf_path=model_id, oauth_token=oauth_token)
119
- print("Upload done")
 
 
 
 
 
 
 
 
 
 
120
  return (
121
  f'Find your repo <a href="https://hf.co/{upload_repo}" target="_blank" style="text-decoration:underline">here</a>',
122
  "llama.png",
@@ -142,9 +152,9 @@ with gr.Blocks(css=css) as demo:
142
  )
143
 
144
  q_method = gr.Dropdown(
145
- ["Q2", "Q3", "Q4", "Q6", "Q8"],
146
- label="Quantization Method",
147
- info="MLX quantization type",
148
  value="Q4",
149
  filterable=False,
150
  visible=True
@@ -160,8 +170,8 @@ with gr.Blocks(css=css) as demo:
160
  gr.Markdown(label="output"),
161
  gr.Image(show_label=False),
162
  ],
163
- title="Create your own MLX Quants, blazingly fast ⚡!",
164
- description="The space takes an HF repo as an input, quantizes it and creates a Public/ Private repo containing the selected quant under your HF user namespace.",
165
  api_name=False
166
  )
167
 
 
108
  model_name = model_id.split('/')[-1]
109
  username = whoami(oauth_token.token)["name"]
110
  try:
111
+ if q_method == "FP16":
112
+ upload_repo = f"{username}/{model_name}-MLX-FP16"
113
+ with tempfile.TemporaryDirectory(dir="converted") as tmpdir:
114
+ # The target dir must not exist
115
+ mlx_path = os.path.join(tmpdir, "MLX")
116
+ convert(model_id, mlx_path=mlx_path, quantize=False, dtype="float16")
117
+ print("Conversion done")
118
+ upload_to_hub(path=mlx_path, upload_repo=upload_repo, hf_path=model_id, oauth_token=oauth_token)
119
+ print("Upload done")
120
+ else:
121
+ q_bits = QUANT_PARAMS[q_method]
122
+ upload_repo = f"{username}/{model_name}-MLX-{q_bits}Bit"
123
+ with tempfile.TemporaryDirectory(dir="converted") as tmpdir:
124
+ # The target dir must not exist
125
+ mlx_path = os.path.join(tmpdir, "MLX")
126
+ convert(model_id, mlx_path=mlx_path, quantize=True, q_bits=q_bits)
127
+ print("Conversion done")
128
+ upload_to_hub(path=mlx_path, upload_repo=upload_repo, hf_path=model_id, oauth_token=oauth_token)
129
+ print("Upload done")
130
  return (
131
  f'Find your repo <a href="https://hf.co/{upload_repo}" target="_blank" style="text-decoration:underline">here</a>',
132
  "llama.png",
 
152
  )
153
 
154
  q_method = gr.Dropdown(
155
+ ["FP16", "Q2", "Q3", "Q4", "Q6", "Q8"],
156
+ label="Conversion Method",
157
+ info="MLX conversion type (FP16 for float16, Q2–Q8 for quantized models)",
158
  value="Q4",
159
  filterable=False,
160
  visible=True
 
170
  gr.Markdown(label="output"),
171
  gr.Image(show_label=False),
172
  ],
173
+ title="Create your own MLX Models, blazingly fast ⚡!",
174
+ description="The space takes an HF repo as an input, converts it to MLX format (FP 16 or quantized), and creates a Public/Private repo under your HF user namespace.",
175
  api_name=False
176
  )
177