diff --git "a/ndarray-cache.json" "b/ndarray-cache.json" new file mode 100644--- /dev/null +++ "b/ndarray-cache.json" @@ -0,0 +1,11359 @@ +{ + "metadata": { + "ParamSize": 965, + "ParamBytes": 3075119104.0, + "BitsPerParam": 4.52012387628928 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "lm_head.q_weight", + "shape": [ + 512, + 32000 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "2d09ff227ad5f5579aedb1df2c809ac7" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 28516288, + "records": [ + { + "name": "lm_head.q_scale", + "shape": [ + 128, + 32000 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 0 + }, + { + "name": "model.layers.30.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 8192000 + }, + { + "name": "model.layers.30.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 8200192 + }, + { + "name": "model.layers.30.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 13115392 + }, + { + "name": "model.layers.30.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 13729792 + }, + { + "name": "model.layers.30.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 26873344 + } + ], + "md5sum": "e9ecf8649ea7a4c2398b5144ec8844b7" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 20362752, + "records": [ + { + "name": "model.layers.30.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 0 + }, + { + "name": "model.layers.30.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 13209600 + }, + { + "name": "model.layers.30.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 14860800 + }, + { + "name": "model.layers.30.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 19751424 + } + ], + "md5sum": "4445a6b9cbf73c648e569debc14bc8d8" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 31812608, + "records": [ + { + "name": "model.layers.30.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 0 + }, + { + "name": "model.layers.30.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 13209600 + }, + { + "name": "model.layers.30.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 14860800 + }, + { + "name": "model.layers.30.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 19751424 + }, + { + "name": "model.layers.30.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20362752 + }, + { + "name": "model.layers.30.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 20370944 + }, + { + "name": "model.layers.30.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 23778816 + }, + { + "name": "model.layers.30.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 24204800 + }, + { + "name": "model.layers.30.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 27559424 + }, + { + "name": "model.layers.30.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 27978752 + }, + { + "name": "model.layers.30.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 31386624 + } + ], + "md5sum": "ba37f225fd1ad35a93013e8053821d3e" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 20693504, + "records": [ + { + "name": "model.layers.30.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 0 + }, + { + "name": "model.layers.30.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 3354624 + }, + { + "name": "model.layers.30.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3773952 + }, + { + "name": "model.layers.30.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7128576 + }, + { + "name": "model.layers.30.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7547904 + }, + { + "name": "model.layers.30.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 10955776 + }, + { + "name": "model.layers.30.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11381760 + }, + { + "name": "model.layers.30.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14736384 + }, + { + "name": "model.layers.31.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 15155712 + }, + { + "name": "model.layers.31.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 15163904 + }, + { + "name": "model.layers.31.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 20079104 + } + ], + "md5sum": "f32dcf1d70da85f4aeb1f7d42efedec6" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.31.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.31.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.31.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.31.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "1c1dd9a3edf58736b4f9ca80de88cdbf" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.31.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.31.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.31.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.31.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.31.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.31.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.31.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.31.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.31.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.31.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.31.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "40a0f35dd928942bf945b9c2974306a0" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 65536000, + "records": [ + { + "name": "model.embed_tokens.q_weight", + "shape": [ + 32000, + 512 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 65536000, + "byteOffset": 0 + } + ], + "md5sum": "16494f75246325a9be7dcf30f5fea76f" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 31031808, + "records": [ + { + "name": "model.layers.31.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.31.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.31.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.31.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.31.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.31.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.31.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.31.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.31.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.31.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.31.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.31.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.norm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.embed_tokens.q_scale", + "shape": [ + 32000, + 128 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192000, + "byteOffset": 22831616 + }, + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 31023616 + } + ], + "md5sum": "12daa5381e138e9cfd471fc86a9cef90" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 33525696, + "records": [ + { + "name": "model.layers.0.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 4915200 + }, + { + "name": "model.layers.0.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 5529600 + }, + { + "name": "model.layers.0.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 18673152 + }, + { + "name": "model.layers.0.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 20316096 + } + ], + "md5sum": "49b3eb92dfb29d04992a3c19c97a86c8" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 31357952, + "records": [ + { + "name": "model.layers.0.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 0 + }, + { + "name": "model.layers.0.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 1651200 + }, + { + "name": "model.layers.0.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 6541824 + }, + { + "name": "model.layers.0.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 7153152 + }, + { + "name": "model.layers.0.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 20362752 + }, + { + "name": "model.layers.0.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 22013952 + }, + { + "name": "model.layers.0.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 26904576 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 27515904 + }, + { + "name": "model.layers.0.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 27524096 + }, + { + "name": "model.layers.0.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 30931968 + } + ], + "md5sum": "b61185745c0c207dc6965b85e9c0760c" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 32135168, + "records": [ + { + "name": "model.layers.0.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 0 + }, + { + "name": "model.layers.0.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 3354624 + }, + { + "name": "model.layers.0.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 3773952 + }, + { + "name": "model.layers.0.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 7181824 + }, + { + "name": "model.layers.0.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 7607808 + }, + { + "name": "model.layers.0.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 10962432 + }, + { + "name": "model.layers.0.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 11381760 + }, + { + "name": "model.layers.0.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 14789632 + }, + { + "name": "model.layers.0.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 15215616 + }, + { + "name": "model.layers.0.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 18570240 + }, + { + "name": "model.layers.0.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 18989568 + }, + { + "name": "model.layers.0.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 22397440 + }, + { + "name": "model.layers.0.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 22823424 + }, + { + "name": "model.layers.0.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 26178048 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 26597376 + }, + { + "name": "model.layers.1.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 26605568 + }, + { + "name": "model.layers.1.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 31520768 + } + ], + "md5sum": "8e8e203ade60a9775f08559a33b75c2e" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.1.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.1.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.1.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.1.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "0d092e628f981fd39a153b4b2b2cbad3" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.1.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.1.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.1.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.1.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.1.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.1.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.1.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.1.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.1.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.1.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "1685b255b945980d046e47185e4e267c" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.1.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.1.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.1.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.1.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.1.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.1.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.1.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.1.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.1.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.1.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.1.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.1.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.10.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.10.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "d20112977cc4c7a22981decfdb5624e5" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.10.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.10.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.10.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "0899370cc05384449589d9c91b60d6d0" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.10.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.10.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.10.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.10.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.10.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.10.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.10.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.10.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.10.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.10.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "f1e457ba56b03a30a4ddc55bb4ad1717" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.10.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.10.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.10.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.10.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.10.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.10.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.10.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.10.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.10.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.10.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.10.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.10.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.11.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.11.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "70038a64d14f07409dd7289b525d4840" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.11.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.11.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.11.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.11.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "81049aac52d28a56be97e5258b140452" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.11.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.11.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.11.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.11.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.11.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.11.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.11.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.11.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.11.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.11.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "7f4bec5b43b830b19b7dda9265da6edc" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.11.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.11.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.11.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.11.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.11.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.11.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.11.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.11.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.11.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.11.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.11.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.11.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.12.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.12.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "1066a7d8ea7c299c1e8441c1e95139d7" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.12.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.12.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.12.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "3eb2ff46424914ef00b61e49a25a8e80" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.12.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.12.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.12.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.12.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.12.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.12.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.12.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.12.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.12.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.12.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "a8b92836e5c7076172616b3c5710813f" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.12.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.12.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.12.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.12.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.12.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.12.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.12.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.12.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.12.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.12.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.12.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.12.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.13.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.13.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "07a1bd54fcafa13d192d8b46630c71af" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.13.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.13.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.13.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.13.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "8c14ec568e8614f1b0bfc01a264cc51f" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.13.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.13.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.13.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.13.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.13.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.13.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.13.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.13.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.13.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.13.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "347999e785f8bde7ca2bee632cf483ef" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 28353024, + "records": [ + { + "name": "model.layers.13.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.13.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.13.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.13.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.13.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.13.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.13.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.13.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.13.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.13.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.13.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.13.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.14.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22823424 + }, + { + "name": "model.layers.14.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27738624 + } + ], + "md5sum": "9393c92a4b05bcfd235b8e17b07bf0dc" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 31804416, + "records": [ + { + "name": "model.layers.14.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 0 + }, + { + "name": "model.layers.14.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 13209600 + }, + { + "name": "model.layers.14.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 14860800 + }, + { + "name": "model.layers.14.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 19751424 + }, + { + "name": "model.layers.14.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 20362752 + }, + { + "name": "model.layers.14.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 23770624 + }, + { + "name": "model.layers.14.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 24196608 + }, + { + "name": "model.layers.14.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 27551232 + }, + { + "name": "model.layers.14.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 27970560 + }, + { + "name": "model.layers.14.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 31378432 + } + ], + "md5sum": "bdc7e1c5c6bc6e378a0d8781154eed92" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 24527360, + "records": [ + { + "name": "model.layers.14.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 0 + }, + { + "name": "model.layers.14.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 3354624 + }, + { + "name": "model.layers.14.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 3773952 + }, + { + "name": "model.layers.14.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 7181824 + }, + { + "name": "model.layers.14.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 7607808 + }, + { + "name": "model.layers.14.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 10962432 + }, + { + "name": "model.layers.14.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 11381760 + }, + { + "name": "model.layers.14.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 14789632 + }, + { + "name": "model.layers.14.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 15215616 + }, + { + "name": "model.layers.14.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 18570240 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18989568 + }, + { + "name": "model.layers.2.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 18997760 + }, + { + "name": "model.layers.2.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 23912960 + } + ], + "md5sum": "71cd12e98ed2fcdd6a6e8716808c6897" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.2.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.2.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.2.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.2.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "9e21b0776bbf46c410bad37ba30bccb3" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.2.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.2.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.2.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.2.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.2.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.2.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.2.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.2.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.2.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.2.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "65b86c55b99f25af636c35759e608ea3" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.2.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.2.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.2.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.2.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.2.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.2.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.2.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.2.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.2.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.2.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.2.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.2.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.3.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.3.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "bf93386dd2e54d53a0b178ea7b01dbc1" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.3.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.3.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.3.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.3.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "7ab3ea1fcf6c2b6985605fdd7fade21a" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.3.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.3.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.3.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.3.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.3.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.3.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.3.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.3.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.3.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.3.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "f325b84d2da030a6357ce1a5cd3135e4" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.3.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.3.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.3.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.3.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.3.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.3.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.3.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.3.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.3.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.3.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.3.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.3.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.4.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.4.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "390cc511628f498dc73b8b9deccc42af" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.4.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.4.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.4.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "1be69c1f7c96370c8e15a1587f47738d" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.4.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.4.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.4.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.4.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.4.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.4.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.4.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.4.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.4.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.4.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "797f263a6654e9c1e27fe93db223c2bf" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.4.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.4.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.4.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.4.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.4.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.4.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.4.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.4.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.4.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.4.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.4.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.4.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.5.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.5.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "bd458875c632a061413cd646ead84fd0" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.5.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.5.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.5.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.5.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "ee8717dd4514f33a330ffc32ec9ab010" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.5.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.5.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.5.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.5.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.5.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.5.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.5.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.5.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.5.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.5.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "3ca8d836450bd32346b502c64d240836" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.5.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.5.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.5.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.5.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.5.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.5.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.5.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.5.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.5.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.5.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.5.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.5.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.6.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.6.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "41ac3646f310cd803cbf496b44ee1b98" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.6.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.6.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.6.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.6.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "d08b37aa118fbd6e62d27920b0c8efa6" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.6.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.6.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.6.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.6.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.6.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.6.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.6.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.6.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.6.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.6.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "ffd5f3e40b818c6653cbb3279d808167" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.6.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.6.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.6.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.6.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.6.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.6.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.6.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.6.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.6.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.6.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.6.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.6.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.7.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.7.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "95660ca5088bff34db1dbd2d7d83dd49" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.7.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.7.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.7.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.7.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "6042fe8e6bce060db74c738292895c0a" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.7.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.7.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.7.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.7.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.7.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.7.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.7.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.7.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.7.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.7.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "eed0c34ec21d0e55244050f55e1d6825" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.7.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.7.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.7.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.7.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.7.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.7.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.7.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.7.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.7.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.7.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.7.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.7.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.8.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.8.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "8e15c22a4b525884028bb94380dcb415" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.8.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.8.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.8.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.8.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "523adca410c4d2f0d008dee8ef29d4a3" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.8.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.8.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.8.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.8.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.8.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.8.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.8.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.8.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.8.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.8.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "94674b8b0cb9f6d133f2ce1429472ee1" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.8.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.8.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.8.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.8.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.8.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.8.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.8.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.8.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.8.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.8.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.8.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.8.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.9.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.9.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "c924fd17898ac69620fcd33789b8c460" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.9.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.9.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.9.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "c4d6deaac622064aeaa1dd742ed5666b" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.9.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.9.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.9.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.9.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.9.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.9.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.9.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.9.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.9.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.9.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "bf8627d5ef13e90bcbc948cdd8f33659" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 22831616, + "records": [ + { + "name": "model.layers.9.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.9.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.9.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.9.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.9.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.9.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.9.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.9.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.9.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.9.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.9.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.9.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + } + ], + "md5sum": "688b1b11440ff72ca9434fa739017578" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.14.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.14.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.14.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.14.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "2d32a4290b699da359505bd1496839aa" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 25834432, + "records": [ + { + "name": "model.layers.14.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.14.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5501952 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 5510144 + }, + { + "name": "model.layers.15.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 5518336 + }, + { + "name": "model.layers.15.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 10433536 + }, + { + "name": "model.layers.15.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 11047936 + }, + { + "name": "model.layers.15.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 24191488 + } + ], + "md5sum": "fda4aaf5e3d62ff97fa88f609112f7c2" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 20362752, + "records": [ + { + "name": "model.layers.15.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 0 + }, + { + "name": "model.layers.15.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 13209600 + }, + { + "name": "model.layers.15.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 14860800 + }, + { + "name": "model.layers.15.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 19751424 + } + ], + "md5sum": "2eb91ce147e1ed8a56c77a5826edc3a2" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 31812608, + "records": [ + { + "name": "model.layers.15.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 0 + }, + { + "name": "model.layers.15.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 13209600 + }, + { + "name": "model.layers.15.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 14860800 + }, + { + "name": "model.layers.15.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 19751424 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 20362752 + }, + { + "name": "model.layers.15.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 20370944 + }, + { + "name": "model.layers.15.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 23778816 + }, + { + "name": "model.layers.15.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 24204800 + }, + { + "name": "model.layers.15.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 27559424 + }, + { + "name": "model.layers.15.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 27978752 + }, + { + "name": "model.layers.15.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 31386624 + } + ], + "md5sum": "65eeabe570b7cae849d39c1d1564fca6" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 24527360, + "records": [ + { + "name": "model.layers.15.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 0 + }, + { + "name": "model.layers.15.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 3354624 + }, + { + "name": "model.layers.15.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 3773952 + }, + { + "name": "model.layers.15.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 7181824 + }, + { + "name": "model.layers.15.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 7607808 + }, + { + "name": "model.layers.15.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 10962432 + }, + { + "name": "model.layers.15.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 11381760 + }, + { + "name": "model.layers.15.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 14789632 + }, + { + "name": "model.layers.15.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 15215616 + }, + { + "name": "model.layers.15.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 18570240 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 18989568 + }, + { + "name": "model.layers.16.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 18997760 + }, + { + "name": "model.layers.16.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 23912960 + } + ], + "md5sum": "b2872048cfb7806c5fab28ffcac07318" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.16.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.16.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.16.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "8fb5556ef6aa721ae50ba8ff65ab576c" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.16.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.16.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.16.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.16.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.16.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.16.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.16.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.16.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.16.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.16.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "cf7451b6a6e4751a5453294fc0598591" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.16.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.16.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.16.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.16.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.16.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.16.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.16.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.16.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.16.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.16.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.16.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.16.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.17.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.17.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "10e5afc77c4018de595ddbf9a70511ce" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.17.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.17.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.17.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.17.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "573a6fab79fdc11f54541ce258f70683" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.17.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.17.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.17.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.17.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.17.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.17.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.17.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.17.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.17.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.17.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "22f0d64cdbf76682ecbc7c8031ee9d8b" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.17.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.17.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.17.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.17.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.17.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.17.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.17.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.17.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.17.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.17.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.17.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.17.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.18.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.18.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "293613e3a385a9001f71453af21af8d4" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.18.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.18.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.18.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "c46433ae33581cdc14490721f498277c" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.18.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.18.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.18.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.18.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.18.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.18.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.18.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.18.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.18.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.18.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "a70da3e72f9524f3287c56e1c10d4d8d" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.18.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.18.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.18.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.18.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.18.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.18.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.18.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.18.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.18.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.18.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.18.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.18.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.19.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.19.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "ce7ce486f0427668fdf93a7d07edbff7" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.19.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.19.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.19.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.19.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "7981efdaeec356fca7dfddd94c948a60" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.19.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.19.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.19.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.19.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.19.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.19.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.19.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.19.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.19.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.19.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "cb33d6a8508150571ec5e14cff50578c" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.19.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.19.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.19.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.19.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.19.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.19.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.19.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.19.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.19.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.19.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.19.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.19.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.20.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.20.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "53a1f97405420ce6d8c8b40777aa82d0" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.20.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.20.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.20.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.20.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "aa6f87871c402cfaaeee7513cb133426" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.20.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.20.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.20.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.20.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.20.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.20.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.20.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.20.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.20.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.20.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "b31c8b058ff7c59914afef0ed28a6830" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.20.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.20.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.20.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.20.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.20.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.20.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.20.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.20.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.20.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.20.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.20.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.20.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.21.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.21.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "791b4613b0f440ff20e2fa36ebe9d552" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.21.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.21.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.21.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "aa7884001c69a17446f78c872d2aa67f" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.21.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.21.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.21.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.21.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.21.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.21.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.21.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.21.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.21.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.21.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "25d15907fd704bed06298fbd0d1370f5" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.21.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.21.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.21.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.21.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.21.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.21.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.21.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.21.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.21.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.21.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.21.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.21.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.22.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.22.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "761cace575fbb4be1d8991e3fa7e230a" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.22.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.22.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.22.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.22.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "d50d01a3ce677611610db57249fb825b" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.22.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.22.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.22.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.22.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.22.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.22.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.22.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.22.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.22.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.22.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "96773f5f79393eb3353820cfa75f4017" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.22.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.22.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.22.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.22.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.22.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.22.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.22.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.22.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.22.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.22.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.22.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.22.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.23.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.23.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "d6c1d79ad0f7f7d7914353f8dff61271" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.23.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.23.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.23.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.23.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "d7cc24e762b0924ff58ddc2877d83419" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.23.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.23.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.23.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.23.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.23.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.23.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.23.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.23.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.23.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.23.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "6caf256a64ec007840f443934547a395" + }, + { + "dataPath": "params_shard_81.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.23.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.23.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.23.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.23.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.23.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.23.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.23.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.23.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.23.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.23.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.23.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.23.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.24.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.24.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.24.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "acbee8493bda9cc9f2084f7e9c07f242" + }, + { + "dataPath": "params_shard_82.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.24.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.24.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.24.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "73f15b7032620e49f8936ebc3f008d4a" + }, + { + "dataPath": "params_shard_83.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.24.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.24.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.24.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.24.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.24.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.24.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.24.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.24.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.24.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.24.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.24.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "b91fb9cd5249e20c99e270d9f8c3e52e" + }, + { + "dataPath": "params_shard_84.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.24.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.24.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.24.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.24.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.24.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.24.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.24.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.24.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.24.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.24.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.24.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.24.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.25.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.25.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.25.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "1ffc5bc9e66ca3520afcc65d3b22fc02" + }, + { + "dataPath": "params_shard_85.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.25.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.25.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.25.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.25.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "e6b8c7166f4e28a9a0ccb10643b3a3e2" + }, + { + "dataPath": "params_shard_86.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.25.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.25.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.25.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.25.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.25.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.25.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.25.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.25.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.25.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.25.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.25.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "8652e88f6fdc477798dc7c4411944437" + }, + { + "dataPath": "params_shard_87.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.25.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.25.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.25.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.25.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.25.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.25.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.25.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.25.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.25.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.25.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.25.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.25.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.26.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.26.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.26.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "173811a0ea69d2e0d7404e0a7c8a1b03" + }, + { + "dataPath": "params_shard_88.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.26.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.26.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.26.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.26.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "3ebf10d5f11fdb2310d56911fe4db39d" + }, + { + "dataPath": "params_shard_89.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.26.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.26.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.26.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.26.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.26.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.26.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.26.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.26.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.26.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.26.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.26.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "d957fd1965331a2969745b7bed77db64" + }, + { + "dataPath": "params_shard_90.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.26.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.26.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.26.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.26.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.26.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.26.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.26.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.26.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.26.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.26.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.26.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.26.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.27.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.27.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.27.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "37ab5c57049e2c53ef664981b6c6fff8" + }, + { + "dataPath": "params_shard_91.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.27.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.27.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.27.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.27.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "93cea88fb9e251c8c53f4d0b0e738d90" + }, + { + "dataPath": "params_shard_92.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.27.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.27.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.27.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.27.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.27.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.27.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.27.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.27.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.27.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.27.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.27.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "d21ddc726a375c06206551b8c610fd3d" + }, + { + "dataPath": "params_shard_93.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.27.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.27.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.27.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.27.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.27.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.27.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.27.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.27.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.27.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.27.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.27.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.27.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.28.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.28.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.28.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "89dec68a1f6f702118c8c960e7c3afb1" + }, + { + "dataPath": "params_shard_94.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.28.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.28.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.28.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.28.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "79363491a4d3cb15ecb94079e5fb8eae" + }, + { + "dataPath": "params_shard_95.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.28.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.28.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.28.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.28.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.28.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.28.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.28.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.28.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.28.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.28.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.28.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "66ea4f7b0786057c6e691f5c47f24cb5" + }, + { + "dataPath": "params_shard_96.bin", + "format": "raw-shard", + "nbytes": 28361216, + "records": [ + { + "name": "model.layers.28.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.28.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.28.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.28.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.28.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.28.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.28.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.28.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.28.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.28.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.28.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.28.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.29.input_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 22823424 + }, + { + "name": "model.layers.29.mlp.down_u_proj.q_weight", + "shape": [ + 300, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4915200, + "byteOffset": 22831616 + }, + { + "name": "model.layers.29.mlp.down_u_proj.q_scale", + "shape": [ + 75, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 614400, + "byteOffset": 27746816 + } + ], + "md5sum": "8ea4888a93ff2decaeaa4682907359d2" + }, + { + "dataPath": "params_shard_97.bin", + "format": "raw-shard", + "nbytes": 29647296, + "records": [ + { + "name": "model.layers.29.mlp.down_v_proj.q_weight", + "shape": [ + 1376, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13143552, + "byteOffset": 0 + }, + { + "name": "model.layers.29.mlp.down_v_proj.q_scale", + "shape": [ + 344, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1642944, + "byteOffset": 13143552 + }, + { + "name": "model.layers.29.mlp.gate_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 14786496 + }, + { + "name": "model.layers.29.mlp.gate_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 27996096 + } + ], + "md5sum": "b7b7f0468cca27ef3c135fac00c20623" + }, + { + "dataPath": "params_shard_98.bin", + "format": "raw-shard", + "nbytes": 33480704, + "records": [ + { + "name": "model.layers.29.mlp.gate_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 0 + }, + { + "name": "model.layers.29.mlp.gate_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 4890624 + }, + { + "name": "model.layers.29.mlp.up_u_proj.q_weight", + "shape": [ + 300, + 11008 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 13209600, + "byteOffset": 5501952 + }, + { + "name": "model.layers.29.mlp.up_u_proj.q_scale", + "shape": [ + 75, + 11008 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 1651200, + "byteOffset": 18711552 + }, + { + "name": "model.layers.29.mlp.up_v_proj.q_weight", + "shape": [ + 512, + 2388 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 4890624, + "byteOffset": 20362752 + }, + { + "name": "model.layers.29.mlp.up_v_proj.q_scale", + "shape": [ + 128, + 2388 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 611328, + "byteOffset": 25253376 + }, + { + "name": "model.layers.29.post_attention_layernorm.weight", + "shape": [ + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8192, + "byteOffset": 25864704 + }, + { + "name": "model.layers.29.self_attn.k_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 25872896 + }, + { + "name": "model.layers.29.self_attn.k_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 29280768 + }, + { + "name": "model.layers.29.self_attn.k_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 29706752 + }, + { + "name": "model.layers.29.self_attn.k_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 33061376 + } + ], + "md5sum": "fb0ce9ed719c7a1a0ac4cce22ab8e4c1" + }, + { + "dataPath": "params_shard_99.bin", + "format": "raw-shard", + "nbytes": 26657280, + "records": [ + { + "name": "model.layers.29.self_attn.o_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 0 + }, + { + "name": "model.layers.29.self_attn.o_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 3407872 + }, + { + "name": "model.layers.29.self_attn.o_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 3833856 + }, + { + "name": "model.layers.29.self_attn.o_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 7188480 + }, + { + "name": "model.layers.29.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 7607808 + }, + { + "name": "model.layers.29.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 11015680 + }, + { + "name": "model.layers.29.self_attn.q_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 11441664 + }, + { + "name": "model.layers.29.self_attn.q_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 14796288 + }, + { + "name": "model.layers.29.self_attn.v_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 15215616 + }, + { + "name": "model.layers.29.self_attn.v_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 18623488 + }, + { + "name": "model.layers.29.self_attn.v_v_proj.q_weight", + "shape": [ + 512, + 1638 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3354624, + "byteOffset": 19049472 + }, + { + "name": "model.layers.29.self_attn.v_v_proj.q_scale", + "shape": [ + 128, + 1638 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 419328, + "byteOffset": 22404096 + }, + { + "name": "model.layers.30.self_attn.q_u_proj.q_weight", + "shape": [ + 208, + 4096 + ], + "dtype": "uint32", + "format": "f32-to-bf16", + "nbytes": 3407872, + "byteOffset": 22823424 + }, + { + "name": "model.layers.30.self_attn.q_u_proj.q_scale", + "shape": [ + 52, + 4096 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 425984, + "byteOffset": 26231296 + } + ], + "md5sum": "1201aec368ae63306799e79579796724" + } + ] +} \ No newline at end of file