pglo commited on
Commit
2c34676
·
verified ·
1 Parent(s): 32467f2

Upload folder using huggingface_hub

Browse files
config.json CHANGED
@@ -1,108 +1,124 @@
1
  {
2
- "_name_or_path": "/checkpoints/instruct_tuning7B_InfInstruct0625_andbunchotherdata_UltraChat_batch128_LR2e-6/checkpoint-32701",
3
  "add_bias_linear": false,
4
- "architectures": [
5
- "Zamba2ForCausalLM"
6
- ],
7
  "attention_dropout": 0.0,
 
 
8
  "bos_token_id": 1,
9
- "conv_dimension": 4,
10
  "eos_token_id": 2,
11
- "expansion_factor": 2,
12
  "ffn_hidden_size": 14336,
13
- "ft_lora": false,
14
- "gated_linear_unit": true,
15
  "hidden_size": 3584,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
16
  "initializer_range": 0.02,
 
17
  "kv_channels": 112,
18
  "layers_block_type": [
19
- "m",
20
- "m",
21
- "m",
22
- "m",
23
- "m",
24
- "m",
25
- "g",
26
- "m",
27
- "m",
28
- "m",
29
- "m",
30
- "g",
31
- "m",
32
- "m",
33
- "m",
34
- "m",
35
- "m",
36
- "g",
37
- "m",
38
- "m",
39
- "m",
40
- "m",
41
- "m",
42
- "g",
43
- "m",
44
- "m",
45
- "m",
46
- "m",
47
- "m",
48
- "g",
49
- "m",
50
- "m",
51
- "m",
52
- "m",
53
- "m",
54
- "g",
55
- "m",
56
- "m",
57
- "m",
58
- "m",
59
- "m",
60
- "g",
61
- "m",
62
- "m",
63
- "m",
64
- "m",
65
- "m",
66
- "g",
67
- "m",
68
- "m",
69
- "m",
70
- "m",
71
- "m",
72
- "g",
73
- "m",
74
- "m",
75
- "m",
76
- "m",
77
- "m",
78
- "g",
79
- "m",
80
- "m",
81
- "m",
82
- "m",
83
- "m",
84
- "g",
85
- "m",
86
- "m",
87
- "m",
88
- "m",
89
- "m",
90
- "g",
91
- "m",
92
- "m",
93
- "m",
94
- "m",
95
- "m",
96
- "g",
97
- "m",
98
- "m",
99
- "m"
100
  ],
101
- "lora_rank": 128,
 
 
102
  "mamba_headdim": 64,
103
  "mamba_ngroups": 2,
104
  "max_position_embeddings": 4096,
105
  "model_type": "zamba2",
 
106
  "num_attention_heads": 32,
107
  "num_hidden_layers": 81,
108
  "num_key_value_heads": 32,
@@ -112,15 +128,16 @@
112
  "pad_token_id": 0,
113
  "rms_norm_eps": 1e-05,
114
  "rope_theta": 10000,
115
- "sliding_window": null,
116
- "state_size": 64,
117
- "torch_dtype": "bfloat16",
118
- "transformers_version": "4.43.0.dev0",
119
- "use_cache": false,
120
- "use_mamba_kernels": true,
121
- "use_mem_eff_path": false,
 
122
  "use_mem_rope": true,
123
- "use_shared_attention_lora": false,
124
- "use_shared_block_lora": true,
125
  "vocab_size": 32000
126
  }
 
1
  {
2
+ "adapter_rank": 128,
3
  "add_bias_linear": false,
 
 
 
4
  "attention_dropout": 0.0,
5
+ "attention_head_dim": 224,
6
+ "attention_hidden_size": 7168,
7
  "bos_token_id": 1,
8
+ "chunk_size": 256,
9
  "eos_token_id": 2,
 
10
  "ffn_hidden_size": 14336,
11
+ "hidden_act": "gelu",
 
12
  "hidden_size": 3584,
13
+ "hybrid_layer_ids": [
14
+ 6,
15
+ 11,
16
+ 17,
17
+ 23,
18
+ 29,
19
+ 35,
20
+ 41,
21
+ 47,
22
+ 53,
23
+ 59,
24
+ 65,
25
+ 71,
26
+ 77
27
+ ],
28
  "initializer_range": 0.02,
29
+ "intermediate_size": 14336,
30
  "kv_channels": 112,
31
  "layers_block_type": [
32
+ "mamba",
33
+ "mamba",
34
+ "mamba",
35
+ "mamba",
36
+ "mamba",
37
+ "mamba",
38
+ "hybrid",
39
+ "mamba",
40
+ "mamba",
41
+ "mamba",
42
+ "mamba",
43
+ "hybrid",
44
+ "mamba",
45
+ "mamba",
46
+ "mamba",
47
+ "mamba",
48
+ "mamba",
49
+ "hybrid",
50
+ "mamba",
51
+ "mamba",
52
+ "mamba",
53
+ "mamba",
54
+ "mamba",
55
+ "hybrid",
56
+ "mamba",
57
+ "mamba",
58
+ "mamba",
59
+ "mamba",
60
+ "mamba",
61
+ "hybrid",
62
+ "mamba",
63
+ "mamba",
64
+ "mamba",
65
+ "mamba",
66
+ "mamba",
67
+ "hybrid",
68
+ "mamba",
69
+ "mamba",
70
+ "mamba",
71
+ "mamba",
72
+ "mamba",
73
+ "hybrid",
74
+ "mamba",
75
+ "mamba",
76
+ "mamba",
77
+ "mamba",
78
+ "mamba",
79
+ "hybrid",
80
+ "mamba",
81
+ "mamba",
82
+ "mamba",
83
+ "mamba",
84
+ "mamba",
85
+ "hybrid",
86
+ "mamba",
87
+ "mamba",
88
+ "mamba",
89
+ "mamba",
90
+ "mamba",
91
+ "hybrid",
92
+ "mamba",
93
+ "mamba",
94
+ "mamba",
95
+ "mamba",
96
+ "mamba",
97
+ "hybrid",
98
+ "mamba",
99
+ "mamba",
100
+ "mamba",
101
+ "mamba",
102
+ "mamba",
103
+ "hybrid",
104
+ "mamba",
105
+ "mamba",
106
+ "mamba",
107
+ "mamba",
108
+ "mamba",
109
+ "hybrid",
110
+ "mamba",
111
+ "mamba",
112
+ "mamba"
113
  ],
114
+ "mamba_d_conv": 4,
115
+ "mamba_d_state": 64,
116
+ "mamba_expand": 2,
117
  "mamba_headdim": 64,
118
  "mamba_ngroups": 2,
119
  "max_position_embeddings": 4096,
120
  "model_type": "zamba2",
121
+ "n_mamba_heads": 112,
122
  "num_attention_heads": 32,
123
  "num_hidden_layers": 81,
124
  "num_key_value_heads": 32,
 
128
  "pad_token_id": 0,
129
  "rms_norm_eps": 1e-05,
130
  "rope_theta": 10000,
131
+ "time_step_floor": 0.0001,
132
+ "time_step_limit": null,
133
+ "time_step_max": 0.1,
134
+ "time_step_min": 0.001,
135
+ "transformers_version": "4.49.0.dev0",
136
+ "use_cache": true,
137
+ "use_conv_bias": true,
138
+ "use_long_context": false,
139
  "use_mem_rope": true,
140
+ "use_shared_attention_adapter": false,
141
+ "use_shared_mlp_adapter": true,
142
  "vocab_size": 32000
143
  }
model-00001-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78d23a615a9a98740ce3a637b69f81197f18072d1e054eef2071e789dc73fbaa
3
+ size 4917530344
model-00002-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6710b924149889e84a492561689063373ee0a309678eb7a571a1305d9e6292ad
3
+ size 4968640128
model-00003-of-00003.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c803c033a051563856a62605904b1fae7287962e061c94349a4ab01513a44fa3
3
+ size 4934768840
model.safetensors.index.json CHANGED
The diff for this file is too large to render. See raw diff