|
{ |
|
"metadata": { |
|
"ParamSize": 485, |
|
"ParamBytes": 18982125568.0, |
|
"BitsPerParam": 4.500270809152499 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 131072000, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_weight", |
|
"shape": [ |
|
32000, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072000, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c96449d93b885c3cca6eec8e85cb078d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "18210afa39347362990d1a4885447f48" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "137cd7d5a5d6af01a64651ef7896bf1f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 27705344, |
|
"records": [ |
|
{ |
|
"name": "lm_head.q_scale", |
|
"shape": [ |
|
32000, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384000, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.41.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16384000 |
|
}, |
|
{ |
|
"name": "model.layers.41.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 16400384 |
|
}, |
|
{ |
|
"name": "model.layers.41.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 27672576 |
|
}, |
|
{ |
|
"name": "model.layers.42.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 27688960 |
|
} |
|
], |
|
"md5sum": "6523170eeb13eee713f92c668259f278" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3dabd5266718dc5811e95e7d0c4cca03" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "be88eea5f85a643717879f355082034c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b20b3e767f88db4b3da64770d4538a69" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fbf88fb466b259a213c15db28ca45ebb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6d485cfb1065fecba48b3c48bf03a895" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0d4af812ffe8e6c0e12d79cff36e37bf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6e60608f388b014192a2c2437d40bb48" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "caba8b98557adc848e00263cc90b0e80" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.42.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.42.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.42.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.43.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.43.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.43.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "9fac2b7c3fd87c0948543fa53f0dd565" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bca6dc7925da6c766bab2d88f6a96b89" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cbe2a97df331c2680c8dfea7d8349a38" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f746568ad2413fed0239807f9926dbd6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3cbf6d702864232bc12dc99122e585a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8b0c1dd3114836c1400056540132c51c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "be4aaeb692e77bc6adc2755c494ec6a7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6ca389f0178eeeb20d6380ae7b93b608" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.43.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.44.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.44.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.44.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.44.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.44.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.layers.45.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "830fa58056ba82e68b4034bd199f0692" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "caafaf6609f0586b0259d39d846c0748" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8b3de49a730d577f2b420b4ef9b8abb1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3c0d5991b422c843fe869faec8034813" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a571cebf29b5f12480e59d1237d84a4d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "26000ff012178d5cfbc502c4c493d959" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "22bd0619a4712e78e34b744d5211b535" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e48fe0462139f13cb8d3d238e5c3f15c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "81e2f141700ecde5bbe740929fcfa358" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.45.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.45.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.45.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.46.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.46.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.46.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "fec67841bc09b6fd632d6d700f81c4d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9e4d76493d10368da7ffca304b3e18c8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e54b0c430ac589bd15d6fb7a54cd2e04" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cea36f4e3b84506d6e2a545ead641bbc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6fca9abc03e10ee897ecb890d3c67d9f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8ed5da70a5601de4978aff857be4900d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2100c79728b2f724e3a348bc45f5483f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 131072000, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_weight", |
|
"shape": [ |
|
32000, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 131072000, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ff12f405870a53612c3fed46712ffd96" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.46.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.47.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.47.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.47.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.47.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.47.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "a8be076bc890a743b7b3d6e283fb04d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f47242a36754030eabe2e38a63497616" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7a9c06fd0718bd363e073d0c7ad0410a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "401e8a10ffec246d6eb569d102521894" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "37f7db7d96a883c6a9458cda8fd214ba" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "76c3e74c65692e4e6b1f9edb1ebda5e4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32931840, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.q_scale", |
|
"shape": [ |
|
32000, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384000, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 16384000 |
|
}, |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 16400384 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 27672576 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 27688960 |
|
} |
|
], |
|
"md5sum": "f2c24146b5e022d5bb7e41adab47c0d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f97e0daba8f368bf22a00f13ea3f02ae" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "826fbfd9fc45ea9b1213a2b333a5419b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0765065c8246a79ad840d367d9ddc84c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "826bd2bb5b9bf6dcc155c5588ee26323" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "abc3eb0c352bcba64a01b7a7bdf89e63" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5fdee621ad002ba92b5294009af858aa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24952832, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 4194304 |
|
}, |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 4210688 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 15482880 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 15499264 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 24936448 |
|
} |
|
], |
|
"md5sum": "bd7632f683edd91dac2873f90fe29089" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "28e1ed93128592371f12763577c66ff2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cebf91c65a7368d8f85b5808966b96ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bf6a931d753387a032005149a5c569e7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6f6bc015960bd804a11255830158b06f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3f71a9ce2769e5af3fa179df2ce85e00" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "661d7158a1c717c9a403487fbf000658" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bce51f9b49907b546205b284b8f323d0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "76bde5d49919ebf5b1a27fe309c21935" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "f0061ca0ed03a1cde63910b9c039a402" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "37e26dddc9c1aca4aee38a072ffcc819" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c12b9f4d9a9aeccff1a080401e45d26e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "92f9fd19ea1ff55e5df31efe355da201" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "29ccc1fcbd984a9e03ea16c565f35115" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f0cc1ba01e57d268680fb159f6d46b4a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9f13afe90d1bedb96dd3db7f4d3f455a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7e45a31c5abb7531a45a6cb4485b66e4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "de7710dc304b010afc026a2b280cdaaa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d0edb25c027d5cfe270d9000287e6d74" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "532ef17eaa16d6ba260eb78eac0db8e4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3401f2cbc09c6c19aef34b234df40567" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "808f2e6a2f7306f9d4c8c49c40243f94" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1d84e04de2c0a19d97d0ff2d1858f64c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ba40c0cddff459511ff2c632ad5dea71" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "50ec1fb8e16ba98f3de3422ce328da8a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f9d8d6ce640f271ac1fe8980c2860c80" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ea9f6a07b91b268b66902292b0aff8be" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30179328, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25968640 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30162944 |
|
} |
|
], |
|
"md5sum": "d568fa422909a1b87b411e8c5af3b3b5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3aef7128df8b6cc3246be99922958cdb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6240a9650f1dfd466c35765872d0dabf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d049571e076161c364ddbdb6e96570a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "97b5191ce6991f30d9706dce86451f7b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5e66e42482b976ac809c2f3d08faac5a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f5878b1ac40cb9f9f02481d272dd01fb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2106be5e5726efcaee4a5ddd2c8b3bc3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a861b4f51b004742b8e1f26b6717611c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "26d57dca8c98519b502f1ac5486d1fa7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3c741bafae3f9479d54c6c3423b7f6ba" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1725609b23313af770120848088199e1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a6178693b8e1a830ad3553cbb63c925f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e0ae68e87f0adc86abc975a809cb03ba" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9ff3a48e83435d7f27e2f7c15d97c5d4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3cc4fe6c1d41cd15f3966e095f42e2ff" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e49cb7ccff8a5f00c148825763faff2b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4e8f26834b813ce31443f9533edc8c93" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7b4188949134de0ac2af14a99ccd4ea0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30179328, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
} |
|
], |
|
"md5sum": "2fccd591a5055cadccb90a2c69645fca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4475bf55ab545412d72f48c563e4a022" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f71dce5b8983de5804423c06c7b49f42" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d331825c224faf7b70c5d39618a22115" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9f6193dd1f3a4cffcc166788a16631e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4e91cb55410646bb4253a9b534611536" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7c82d59042d68ab67c5ced334cc78d77" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32047104, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20758528 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32030720 |
|
} |
|
], |
|
"md5sum": "449716892c836a14bab96524f1f566ac" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "01733026a886e71fe149251e2638b63e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "86680f1e968043a9794cd9813c5253ac" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6147fbd18f6c6784ff3e2adb67fde29d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ce6dcdf157938e7b2591aa040a72ae42" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "222b02ac1ff05b64ca875dfa185f482d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d5c5fe8048fc4899af2ee54300a77d69" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "15a5da764f2beb7c507c61065c98b42b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "9c2cbfce4fde7a277f9d571141b6f704" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a91f411effd1ee5cb458f7604bf140f8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8e71d439762b29b35333dbaef974e055" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2f509dc37d32b404ec74698ebafe2b7a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_115.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e4b34a96d27990e193cbe62be29df058" |
|
}, |
|
{ |
|
"dataPath": "params_shard_116.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c1c2eb4ef7e9e8ff79a4bdde86ce71e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_117.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "49e3463252a2b7289d475a9bc942d791" |
|
}, |
|
{ |
|
"dataPath": "params_shard_118.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32047104, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32030720 |
|
} |
|
], |
|
"md5sum": "246f54851bf87e12beca49cd53937ec4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_119.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e0e9921155d43f257ad4580a8dc647c1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_120.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ae2e66d7ad47c6d133a48c4ab45c33b6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_121.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e66e9717a57d0448977c429e5f2f83fb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_122.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "66f935e7269e7ada03b9351d587c4ce8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_123.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "02339b64dca141be59e5166a3110fd81" |
|
}, |
|
{ |
|
"dataPath": "params_shard_124.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "20d379fe2fe6b116a6cadd2ab314a5a3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_125.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "995924ab69271408483273cd29d46bbe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_126.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f9cae2084e396bbd2618245fc30ae734" |
|
}, |
|
{ |
|
"dataPath": "params_shard_127.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "ba6f7c965b1a1115854c24dc25db43c2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_128.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d654ef91a0eb6c6238c7033560c9d7be" |
|
}, |
|
{ |
|
"dataPath": "params_shard_129.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7b369d29466585849223685285f5d13a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_130.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bcc58f1247be94d595e46dab775dc48d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_131.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fafd0fba2c98c030bac5c7bc16abd460" |
|
}, |
|
{ |
|
"dataPath": "params_shard_132.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "60238604147b2e72b9365baf4af816ed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_133.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4308871c8cabc9ba8f9f3481bcdc0aa3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_134.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f07dd65bd455ed66272a2e97d4b5ba0d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_135.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "383a45c8d906d123529ba38141a9782b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_136.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3fa9e11c362829e2fb77aefaaa98f54f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_137.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6dde83501aa6e959983f1f933c96f0bd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_138.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e9e336484ea87129b279d8ff0708abed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_139.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "67aed81f6db82a62c4e3d7152992784f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_140.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2a26cdfbb4b38e296c143fd847b8f5ca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_141.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e3234d550261dbf3b84a17b5593f1031" |
|
}, |
|
{ |
|
"dataPath": "params_shard_142.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8a22d55a9e9857eb2117cca617ea5873" |
|
}, |
|
{ |
|
"dataPath": "params_shard_143.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bb38582f07846146030ae98e5c71f0dc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_144.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "f9934396fef0f520effdf890f375b26a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_145.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3acd27e7db33b945e600b7f00c310327" |
|
}, |
|
{ |
|
"dataPath": "params_shard_146.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5628aca32d4e9514b43b4ce289e450fa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_147.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6b6c55ded52849980df8294614f0dc41" |
|
}, |
|
{ |
|
"dataPath": "params_shard_148.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ff09726320402d5043a9f23a17045501" |
|
}, |
|
{ |
|
"dataPath": "params_shard_149.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ef8754148dc0b7cac3f18ef26a694e64" |
|
}, |
|
{ |
|
"dataPath": "params_shard_150.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f49397a90cd8b574c0d89e05e542826e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_151.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7f578fdb7fb028b21dabc94d5f7f5b8d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_152.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0f3dd5f7b11fd72c5b31598830978e60" |
|
}, |
|
{ |
|
"dataPath": "params_shard_153.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "83d76e27e82200b7de15956e7141088c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_154.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30179328, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
} |
|
], |
|
"md5sum": "b8895300d8d7d08718b67805d1317566" |
|
}, |
|
{ |
|
"dataPath": "params_shard_155.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f6e2c38de1000096ee8ecfc5f4ca63c4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_156.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9cfd9f169b3d6a0d64c1e5dfab655df6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_157.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a2f67a58cfa2eecb95ba5276631007ac" |
|
}, |
|
{ |
|
"dataPath": "params_shard_158.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f88c051df1e772fb9562746e3e084497" |
|
}, |
|
{ |
|
"dataPath": "params_shard_159.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "36ff869b7c9aeaeee8f631218d1661fe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_160.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4c67b1d8404a390523e1ff4d5431944d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_161.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32047104, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20758528 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32030720 |
|
} |
|
], |
|
"md5sum": "d09eb50d4dec6ee5c475cc7a2913dcee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_162.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a1e240824322556ceb78ad8ddbba0f9e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_163.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "85532db1d109d1523d953e14657503a1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_164.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cdb2d068f223045311065739ff0a8ef7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_165.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ee7861c55c882b6fd904223ab1e0a4c9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_166.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0877e8d7d33a776e6dc99940a2449219" |
|
}, |
|
{ |
|
"dataPath": "params_shard_167.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "45b1f274b264a13fce538fcb00db939b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_168.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "81b2e482883c538c6806ff6e6842d2ae" |
|
}, |
|
{ |
|
"dataPath": "params_shard_169.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "bb2d163f3526ece904d493267e224656" |
|
}, |
|
{ |
|
"dataPath": "params_shard_170.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a7af046ebda19b92926d36511f4d19af" |
|
}, |
|
{ |
|
"dataPath": "params_shard_171.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "815c993b7d23ebf5f216213aaaec9926" |
|
}, |
|
{ |
|
"dataPath": "params_shard_172.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7cbf0a579c6d62ff6f84341bb279a48a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_173.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a86713361c44ead1739cfe7e5cb489c8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_174.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "df81de1198533ccd9cfb9dcfc7950ef5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_175.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4c75b260d6e7f709d1d5a87b1a50ed30" |
|
}, |
|
{ |
|
"dataPath": "params_shard_176.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4030671c959d0c121c13d006aaa90732" |
|
}, |
|
{ |
|
"dataPath": "params_shard_177.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bf539ee4f5d213bdb8a42dc32e0c1b1b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_178.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "55c10e044c74eecda5e9fda765fcd137" |
|
}, |
|
{ |
|
"dataPath": "params_shard_179.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "116c29a55283f664bf436869071ece5d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_180.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "79524ea0c52a5d1734214ef880c52c79" |
|
}, |
|
{ |
|
"dataPath": "params_shard_181.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4418a7aff867256a900d099589b65f27" |
|
}, |
|
{ |
|
"dataPath": "params_shard_182.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "51c65267555e30224453cd5acade12f4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_183.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d8dc08eaeacf6b4c499f187a3d658374" |
|
}, |
|
{ |
|
"dataPath": "params_shard_184.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "775ee5c8dc518ddccd11cfb0ff0b76fa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_185.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c970f015140bf250cc5c7d5037744b96" |
|
}, |
|
{ |
|
"dataPath": "params_shard_186.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "088dcfd231761c99d9730a96cd9c0d3a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_187.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cd47e2b3c339a06318150605ac4b24c8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_188.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d085ccf04e00ed42b9c04c569b78c403" |
|
}, |
|
{ |
|
"dataPath": "params_shard_189.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d7f7968deffd5672a1c294676d4f26a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_190.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3946fbc27e8005f3cb48989a745db335" |
|
}, |
|
{ |
|
"dataPath": "params_shard_191.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8956354d5561d2e1ce44ab61d933b902" |
|
}, |
|
{ |
|
"dataPath": "params_shard_192.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1a321fa71309fd6d34efc41c487084f6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_193.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d101b132dd87fc957fd051622ce097f9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_194.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c360b7b89db02027e6adcd018e9a1a63" |
|
}, |
|
{ |
|
"dataPath": "params_shard_195.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3bae2293d52c7b0a14811b126ecea782" |
|
}, |
|
{ |
|
"dataPath": "params_shard_196.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30179328, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25968640 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30162944 |
|
} |
|
], |
|
"md5sum": "fbd3a13ffaceba3a366180c3f808a07b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_197.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1612d85c08ef1f8a28593fc0ef79451d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_198.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c87d190a0690eaf42ba3c361bc0c8c2c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_199.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d6c5294e7a3199f4b1372b9ff1f2abab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_200.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3083e007a670a474fd6b7cc315f1c233" |
|
}, |
|
{ |
|
"dataPath": "params_shard_201.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1a04e6e115c0c5f6df45be2f91744573" |
|
}, |
|
{ |
|
"dataPath": "params_shard_202.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "762fdfe8d350bdf1a485ef7a2d12dcba" |
|
}, |
|
{ |
|
"dataPath": "params_shard_203.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32047104, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.28.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 11304960 |
|
}, |
|
{ |
|
"name": "model.layers.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 22577152 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 22593536 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 27836416 |
|
}, |
|
{ |
|
"name": "model.layers.29.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32030720 |
|
} |
|
], |
|
"md5sum": "b5e4d40acc7ebbef4d4b09e405974a78" |
|
}, |
|
{ |
|
"dataPath": "params_shard_204.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1e41d87d99c75f8e1028a19a0af77cc8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_205.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c7e9b5db8f598f9a3bd5cdc83202a08b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_206.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7cfd56982268b27929ddb2bb4739e8f5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_207.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bb1db489a6e70f4056a7d40f1067551f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_208.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6a40e35d8150d314f620a39d246fccd1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_209.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "02854293ce7085991c11060fc036a21f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_210.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "57a894993f047e5872d4c37db7f47e57" |
|
}, |
|
{ |
|
"dataPath": "params_shard_211.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2e2719bd9e91c96c1e24a2c3c6ee4900" |
|
}, |
|
{ |
|
"dataPath": "params_shard_212.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.30.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "14b177b9f81410a1af427e49a47172d0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_213.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b91482c23505d16bf475ca35ebb7c075" |
|
}, |
|
{ |
|
"dataPath": "params_shard_214.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d184cec2242f5c9d019fe46bd94c2b6f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_215.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fd63c16cd4f6513b3f0ae39a0a94a17c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_216.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "17ea242ae9ca73bf4e1511c5483c9ab6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_217.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "68a55ab2dc9be7ef796f989da819da2f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_218.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5e4aea4c735f17c8b4f326ac9f116973" |
|
}, |
|
{ |
|
"dataPath": "params_shard_219.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "718b67010cb1a9edac808525ab369312" |
|
}, |
|
{ |
|
"dataPath": "params_shard_220.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.31.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.layers.32.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "a13e1eb6af38925274a2e7b430bca974" |
|
}, |
|
{ |
|
"dataPath": "params_shard_221.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bd7a50fe6cb70742d93c984faef7a93b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_222.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "50ff3d181add844ac4029571d1a3d2b0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_223.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3c6137937dab72605cb93047e3c85e6c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_224.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "86aaba947aa99b513743d15486f393b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_225.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4889738b537dbda22e97b2b792c8c308" |
|
}, |
|
{ |
|
"dataPath": "params_shard_226.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f0a27dc13c38c2a55cbc0256ab9a08e0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_227.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dbbfb5936db25a820a05e734c1c4ef95" |
|
}, |
|
{ |
|
"dataPath": "params_shard_228.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "76370143bbf95a19f1ac2fd548dbcd2a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_229.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.32.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.33.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.33.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.33.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "683a9eab8c6a901bfad4185ea8ec3b85" |
|
}, |
|
{ |
|
"dataPath": "params_shard_230.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "46b6c23c4e275558909a7575af6d4c57" |
|
}, |
|
{ |
|
"dataPath": "params_shard_231.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4f458737f096be1b13a020d3c8f7007c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_232.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a74ab6a1ccc16a9c501086a4dc30d587" |
|
}, |
|
{ |
|
"dataPath": "params_shard_233.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31981568, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.33.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.34.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 9437184 |
|
} |
|
], |
|
"md5sum": "16a56b5e321ce1442ab08ffa8b3ba67a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_234.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "56b937b8c191de62cb14045715efe514" |
|
}, |
|
{ |
|
"dataPath": "params_shard_235.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a3007a85c28ba9bca517baa23ebcc42e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_236.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b47dd26511bffcf140a376283df94a29" |
|
}, |
|
{ |
|
"dataPath": "params_shard_237.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1d283ee45659ce5b8aa26514c86ed949" |
|
}, |
|
{ |
|
"dataPath": "params_shard_238.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e917aa8fa8790e490eca2b799db63dc5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_239.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e2151da0b1f334569cfb5ac9435d926a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_240.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32047104, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.34.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.34.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.34.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.34.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.35.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.35.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20758528 |
|
}, |
|
{ |
|
"name": "model.layers.35.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32030720 |
|
} |
|
], |
|
"md5sum": "ec604c2e5c30b21b319e755b271503d6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_241.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6b22768e15f4c755069efc98d1f76b7a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_242.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7103170fd39ed7fc40798b1a68d0acc5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_243.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "45eb90cb9642bcf86b6148b6ed1a7ab6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_244.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ad24d983b3b2a59283dda6cfb23ca2ae" |
|
}, |
|
{ |
|
"dataPath": "params_shard_245.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "97de0607dad855504a19ca3587da2076" |
|
}, |
|
{ |
|
"dataPath": "params_shard_246.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1c5c18a7da81ae41db3c201f1e52e6fb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_247.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a137bfc6dcc622b5b83eedfa3c5d8d43" |
|
}, |
|
{ |
|
"dataPath": "params_shard_248.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.35.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.36.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.36.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.36.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.36.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.36.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.layers.37.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "19b6049af7febf83b8e03616df638eac" |
|
}, |
|
{ |
|
"dataPath": "params_shard_249.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8f011ec2647a8ef6293fc06ea5fdf379" |
|
}, |
|
{ |
|
"dataPath": "params_shard_250.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3773b65084e7db6d7bc4c1d3551b85e1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_251.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c1a877a6f724823a85e81f2b8ba72b4e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_252.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a412551f3313c29083c148f15046aad1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_253.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dc77fb840eda053fa4df27cf261a6ed3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_254.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7f83fb349189af73b18c624d61777174" |
|
}, |
|
{ |
|
"dataPath": "params_shard_255.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4636e963ba0b9ff6e38f2ea7a2bfb650" |
|
}, |
|
{ |
|
"dataPath": "params_shard_256.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ebf2000696952b5bc24d7f6f4d321fd7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_257.bin", |
|
"format": "raw-shard", |
|
"nbytes": 32030720, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.37.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.38.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.38.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.38.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 32014336 |
|
} |
|
], |
|
"md5sum": "65813b1ddf1b067a2bd895500229c495" |
|
}, |
|
{ |
|
"dataPath": "params_shard_258.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "313e0d19eb18d889da005cf54866db49" |
|
}, |
|
{ |
|
"dataPath": "params_shard_259.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ac147664ab98a1d629fbc862e4a1837a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_260.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "652db0f738cec03e48bf1cf28e7c6ee7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_261.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "591b895c6a670624be4966312afd233e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_262.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "14a9078575d81b2ebf207edadb47cea3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_263.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e02ae17ec302c0ba9f32ec27080d9b7d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_264.bin", |
|
"format": "raw-shard", |
|
"nbytes": 90177536, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.down_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
2752 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 90177536, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aaa0a3b3b6adae9e073a736ffa37b741" |
|
}, |
|
{ |
|
"dataPath": "params_shard_265.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30195712, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.38.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 5242880 |
|
}, |
|
{ |
|
"name": "model.layers.39.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 9437184 |
|
}, |
|
{ |
|
"name": "model.layers.39.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 9453568 |
|
}, |
|
{ |
|
"name": "model.layers.39.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.39.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20742144 |
|
}, |
|
{ |
|
"name": "model.layers.39.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25985024 |
|
}, |
|
{ |
|
"name": "model.layers.40.input_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 30179328 |
|
} |
|
], |
|
"md5sum": "c7910f61e29afe6038df8c7dd079ebe9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_266.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c9a0fcf882615dc56e08c69a32b54485" |
|
}, |
|
{ |
|
"dataPath": "params_shard_267.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "de3bc25b745ef5517c70214906889a15" |
|
}, |
|
{ |
|
"dataPath": "params_shard_268.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8b3ef2dcaadd43e54f66673f7aba66bf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_269.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8901f98c8e908fb726303f312bb37730" |
|
}, |
|
{ |
|
"dataPath": "params_shard_270.bin", |
|
"format": "raw-shard", |
|
"nbytes": 180355072, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.mlp.gate_up_proj.q_weight", |
|
"shape": [ |
|
44032, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 180355072, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b691a80944ae116e58484ee87030f541" |
|
}, |
|
{ |
|
"dataPath": "params_shard_271.bin", |
|
"format": "raw-shard", |
|
"nbytes": 22544384, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.mlp.gate_up_proj.q_scale", |
|
"shape": [ |
|
44032, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 22544384, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e4769ed57d25f5314ebf78bb43b53bf7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_272.bin", |
|
"format": "raw-shard", |
|
"nbytes": 41943040, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.self_attn.qkv_proj.q_weight", |
|
"shape": [ |
|
10240, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 41943040, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "659aa4ad747403837c9c8d3d3c219211" |
|
}, |
|
{ |
|
"dataPath": "params_shard_273.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33554432, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.self_attn.o_proj.q_weight", |
|
"shape": [ |
|
8192, |
|
1024 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33554432, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a0f6a82dc2ca131b875a967fc557cb9a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_274.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30162944, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.down_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
688 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 11272192, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.40.post_attention_layernorm.weight", |
|
"shape": [ |
|
8192 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 16384, |
|
"byteOffset": 11272192 |
|
}, |
|
{ |
|
"name": "model.layers.40.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 11288576 |
|
}, |
|
{ |
|
"name": "model.layers.40.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 16531456 |
|
}, |
|
{ |
|
"name": "model.layers.41.self_attn.qkv_proj.q_scale", |
|
"shape": [ |
|
10240, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 5242880, |
|
"byteOffset": 20725760 |
|
}, |
|
{ |
|
"name": "model.layers.41.self_attn.o_proj.q_scale", |
|
"shape": [ |
|
8192, |
|
256 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 4194304, |
|
"byteOffset": 25968640 |
|
} |
|
], |
|
"md5sum": "bb524a032aeb9d14e85f198917ac6cf4" |
|
} |
|
] |
|
} |