Upload folder using huggingface_hub

#1
by schroneko - opened
Files changed (4) hide show
  1. README.md +9 -9
  2. config.json +10 -11
  3. model.safetensors +2 -2
  4. model.safetensors.index.json +0 -0
README.md CHANGED
@@ -1,14 +1,14 @@
1
  ---
2
- base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-7B
3
  tags:
4
  - mlx
5
  ---
6
 
7
- # mlx-community/DeepSeek-R1-Distill-Qwen-7B-4bit
8
 
9
- The Model [mlx-community/DeepSeek-R1-Distill-Qwen-7B-4bit](https://huggingface.co/mlx-community/DeepSeek-R1-Distill-Qwen-7B-4bit) was
10
- converted to MLX format from [deepseek-ai/DeepSeek-R1-Distill-Qwen-7B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-7B)
11
- using mlx-lm version **0.21.1**.
12
 
13
  ## Use with mlx
14
 
@@ -19,14 +19,14 @@ pip install mlx-lm
19
  ```python
20
  from mlx_lm import load, generate
21
 
22
- model, tokenizer = load("mlx-community/DeepSeek-R1-Distill-Qwen-7B-4bit")
23
 
24
- prompt = "hello"
25
 
26
- if tokenizer.chat_template is not None:
27
  messages = [{"role": "user", "content": prompt}]
28
  prompt = tokenizer.apply_chat_template(
29
- messages, add_generation_prompt=True
30
  )
31
 
32
  response = generate(model, tokenizer, prompt=prompt, verbose=True)
 
1
  ---
2
+ base_model: deepseek-ai/DeepSeek-R1-Distill-Qwen-32B
3
  tags:
4
  - mlx
5
  ---
6
 
7
+ # mlx-community/DeepSeek-R1-Distill-Qwen-32B-4bit
8
 
9
+ The Model [mlx-community/DeepSeek-R1-Distill-Qwen-32B-4bit](https://huggingface.co/mlx-community/DeepSeek-R1-Distill-Qwen-32B-4bit) was
10
+ converted to MLX format from [deepseek-ai/DeepSeek-R1-Distill-Qwen-32B](https://huggingface.co/deepseek-ai/DeepSeek-R1-Distill-Qwen-32B)
11
+ using mlx-lm version **0.20.2**.
12
 
13
  ## Use with mlx
14
 
 
19
  ```python
20
  from mlx_lm import load, generate
21
 
22
+ model, tokenizer = load("mlx-community/DeepSeek-R1-Distill-Qwen-32B-4bit")
23
 
24
+ prompt="hello"
25
 
26
+ if hasattr(tokenizer, "apply_chat_template") and tokenizer.chat_template is not None:
27
  messages = [{"role": "user", "content": prompt}]
28
  prompt = tokenizer.apply_chat_template(
29
+ messages, tokenize=False, add_generation_prompt=True
30
  )
31
 
32
  response = generate(model, tokenizer, prompt=prompt, verbose=True)
config.json CHANGED
@@ -6,15 +6,15 @@
6
  "bos_token_id": 151643,
7
  "eos_token_id": 151643,
8
  "hidden_act": "silu",
9
- "hidden_size": 3584,
10
  "initializer_range": 0.02,
11
- "intermediate_size": 18944,
12
  "max_position_embeddings": 131072,
13
- "max_window_layers": 28,
14
  "model_type": "qwen2",
15
- "num_attention_heads": 28,
16
- "num_hidden_layers": 28,
17
- "num_key_value_heads": 4,
18
  "quantization": {
19
  "group_size": 64,
20
  "bits": 4
@@ -23,14 +23,13 @@
23
  "group_size": 64,
24
  "bits": 4
25
  },
26
- "rms_norm_eps": 1e-06,
27
- "rope_theta": 10000,
28
- "sliding_window": 4096,
29
  "tie_word_embeddings": false,
30
  "torch_dtype": "bfloat16",
31
- "transformers_version": "4.44.0",
32
  "use_cache": true,
33
- "use_mrope": false,
34
  "use_sliding_window": false,
35
  "vocab_size": 152064
36
  }
 
6
  "bos_token_id": 151643,
7
  "eos_token_id": 151643,
8
  "hidden_act": "silu",
9
+ "hidden_size": 5120,
10
  "initializer_range": 0.02,
11
+ "intermediate_size": 27648,
12
  "max_position_embeddings": 131072,
13
+ "max_window_layers": 64,
14
  "model_type": "qwen2",
15
+ "num_attention_heads": 40,
16
+ "num_hidden_layers": 64,
17
+ "num_key_value_heads": 8,
18
  "quantization": {
19
  "group_size": 64,
20
  "bits": 4
 
23
  "group_size": 64,
24
  "bits": 4
25
  },
26
+ "rms_norm_eps": 1e-05,
27
+ "rope_theta": 1000000.0,
28
+ "sliding_window": 131072,
29
  "tie_word_embeddings": false,
30
  "torch_dtype": "bfloat16",
31
+ "transformers_version": "4.43.1",
32
  "use_cache": true,
 
33
  "use_sliding_window": false,
34
  "vocab_size": 152064
35
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:75ed9ed7ad66720041dd186dd0d8ce8a2dd8f487ea3f45a4b8fcaa7bbbf1f64f
3
- size 4284346255
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3985d69b00a8f8af6f040fd4e05ab95fc6c0a51e739f37864c29756d5b6e9e2b
3
+ size 4284346187
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff