{ "metadata": { "ParamSize": 805, "ParamBytes": 44100517888.0, "BitsPerParam": 5.000504730733063 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 525336576, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 525336576, "byteOffset": 0 } ], "md5sum": "3eb26648b5f558c692400dae6a251733" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 65667072, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 65667072, "byteOffset": 0 } ], "md5sum": "6614898e29505f2d0e0babc3a8f5a8ad" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "efdd06442457ffd780ddd35f931a09d4" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d8850ad695e7afbe4ab9d9c117f849ad" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fbee9a834a04271177eec1f8ec61fce3" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "34679a78732edb94cf53ae1791feb1a8" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "23b299ecfc24900893b39ceaa140ad8b" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f3e2de86ad9b3da11898710282258c48" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "55d0aac8c1a5961e4fc3c16d7f2e9096" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 29392896, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 5242880 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 9437184 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24117248 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 24133632 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 24150016 } ], "md5sum": "00090d155aaa516834e659a64d9c70d3" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2b62e7c2749ac6970811a8e26a3ba788" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f12ce6badf344ad78598c1ffd61c69f9" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "464d55f11ee4c81019703dd9ebb76ad2" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c4348309d8bf34e3064082ef7a9b02d9" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8326a3e3650ee0eee56cb611cbb1cc39" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "267dc9ee7792f6eba8dfd6bc41eba9dd" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "adeba438497ca117f77e292d2191524b" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "63932f5deb9b9a4f22e88bb9d1a759b9" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 28344320, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 0 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 4194304 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18874368 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 18890752 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 18907136 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 24150016 } ], "md5sum": "08fba2c7c7769ed2924abc240b825f34" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "afbdabfc265864a6858cb77206425f10" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "86b98ed8cc26327aa77182790730613d" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6e416beba5ca69428be3d878dd52258a" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "629054b59c411ca27f304bde0ca8ec11" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "eed27800592c2b6f7ace199ba5277e5d" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "aef1bcf12f8a17012a532a62008ae498" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e74b7bb95f07d25fee278a1c21250651" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fba5302eb11121acbd0d212619577572" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "914346101f63ff7a22c27e7f8c5fcec9" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5986b1ee36dd2609da5f3a4b26cbab55" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7f19408534c249089ef6d1b7fc662f2c" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "e991852788848ffb70815d67338aed33" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f4a2a3f013d2cb2c2ed981628976f0b0" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ce2f608967b1b6e9e5e196006cd04c4f" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "be3019ccd048a09d6a567f616c63b275" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "995cd73c27ccbd925f2b994d6b9c7425" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6f9ee3dc0c868c97e03204426539e9ec" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "8e2d217533fb9e5c82164836fa466b6f" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f49b1801eb1e95ee0bd6d5e7b390e1c1" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "278fe924ac8163c001173ef539c7b790" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "35645356d8bd6a6b060d2b9c71423c98" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6a28490fde27c05d20ff6b923197868a" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "85a080cd0153c1226cd9480f63cf6444" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "1e502e5af50c73f26cffa06794804a8c" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1c12e6371a022cfdf76948e7f5e07a32" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a26ba7c58090a1d5f212da938297685d" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c0028141ffb7af45b7b52767d7572342" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8a3c7d689e4e1ce6165cf306ed0e8e6f" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4b8ef534bc5d03540808c00c50b3694f" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "8740ca9961626aeac52dedb3aefa8c3f" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "318f8c4417d7771cf96a6fc4f74db8e7" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "780e1b9d580672748486bb92723f1d00" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "79d8a47372051b23ffcc074fcf91ec1b" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b001d8dd735c18e80f18d029f8a9dfe4" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "88b11385c9d3f97f8136d748d98af6ee" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "6b5ea17d47d1241c525ef7d300ccfdce" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e38a758a29467975b51ea8a471d96120" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3e588c86fcc28d1447c5c2a8b88822bf" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f01e793ac96b95b397225fac1e6966e3" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7ec5780c491a793bef7a298d9b47b097" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8e8a879b80002d2db5864dca65654bfe" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "e011c4fc71dbe9c4fe180ea51d500393" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3d736066762d2bc7ddcbd6766b0b4a47" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d6d150533592a006b57fd48555bf1793" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8a157ca2a7fe7dcc73234acb3f29c815" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "45eefd20c8ebff790773148a1df27970" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9df9c9a5c5509de08a813b8c2925f848" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "5bc22833ebccfdb544225a82585b1a9e" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ede843d9e31f6611379dc25d0a03b2a5" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3036941cd629429c0b1b5d8f8fcb6198" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "261470f35c4ebf6e6ec67b38ea48d97f" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "259914fec9efdedc15d2f21851c58a87" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0de3b04c86368247ab7bb63fd8134265" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "ea0d86fe14ee766d8fdf54bc79890ee6" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "c1fa9ec6e14b644902fac961e6199da6" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "59856ad446fb398eec944a29fdbce8ee" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "435ec5f2733708e8c94e0a73e26cf098" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "900ee62734deb08982ad0c83cdf0703b" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "179e23f31518b761ebfd29e57e145dd3" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "dc21b4f0703c8e641421c191df3b87b2" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ad1e7a7d841152768bba079f7563a9b3" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8ba48f1bfc5874ad31e4a7c950bcc59d" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "11ecef06e3163b47fafaf626dad32a32" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "dbddebd2c2708539b0a7c3ad1234bd2f" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "279584698884b03ad15c20b493cba4e8" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "3b8a367ebebe083efc8a2e17fc4a819a" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a999c4acefb580d544c449eb51f6588b" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "42c432c0529e40df61f72182d2bca4fc" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "012d88f7356ecd458a4dfa3c974ab2a7" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "af4e465210e4e584b2704f7adca422ad" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "94fd31634cba092e8dfe3be95620dc0b" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "db5465ca370160ef1c9094791bed386a" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2cf581a6b4c0fd1519d107309e3fb5c4" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "292823c7db6620c7a9e274b3f605ebb4" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6f5c08b22ca93f45e2012efa861e586b" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "51cb39911a379d88e1c0de84a2b0b43b" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9611a3024ce88c69b95e34b241e22a67" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "0eaabf9beb5bbdc9bd46a0d62aa1c443" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "343784019e030443b34d06730510da34" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "fb11603e3b3ecba3c3164b5cd0f40c04" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c268018e057189582206c515fd66157f" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fe6cc2707a9b4de91104c873ee47ebc3" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d92c13cb2ce5a0bb961b9f32d7f85452" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "d4e670a27a514c4f074fe6419af4e907" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8c420ef4da23f5b1bffa0d3544fa1f76" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8a6633e9e16c5576c93b44ef8213a5f8" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9d62baebfeb51a723b945abb8604d211" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "48b7f2b7814e22a3543523d955acb68d" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "68d4697910406651d71b0f3162bb6b98" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "bd0d3f39b60911f1f78322f415737454" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1417adb69974e8c6b3802a23ce782627" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "1dd528fdf6e0eae9939eb7fdfd4d1c16" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "20136b10a5376e92c58a7b42cd37baf1" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "163fe6673f6262094e028c589fcebf8d" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "63e3dbe14f2faeb8120e90baae253245" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "299431a3094815ea7546661af24f19b6" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "793b305e1045d4d53f9513e1f3e6b940" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0ad612a786ca67543a7454602d72b5d8" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5a922fb3c0503eccfd7e100b879acedc" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8ecd2e5512cc98722f01a0c538876a87" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "72552d30d13a19426acb7b4732540841" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "54c92979854fb480f6c99da98f34554d" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9f3e49492661d523d8b12896c3472a48" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "dc90d74b7fb51dd9de52ae7c2b0403e7" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0fdade6cb70a87de2dc82edb93c860b4" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "82d20d0be1c6ec3cbf0ac229e491fc6f" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9f889371e4ef6bc2f5472ea87e182f66" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "0f4e5929575191b98a434b8ebeb87923" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "88d2dae990cf124e47c4cbec08922c0f" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "61a3773fa5103d8d9af46c6ddd771ed2" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f815a46f52c2cf2d5b10534b692a7084" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fca70d337a5e874a836cb59b93f6dd07" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "39008841620c74ca1aef4ac58cb91898" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "18f1ee4ef2f0edb4d4093ba63343661a" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "dce00e7be553167dbfe25733e7272004" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8a31d79d41f5c0516010708715c1d197" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3ddca491c7f32720f4e0fde60b01867c" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0510fb8fc6ba6aa988fff213625bbf2e" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0c9a1a8cd6f9efa89ff72b785eba6cb3" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "95a0251da5c59a1488f7bab7c9437c8c" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "51ecac71550791e32b663c50a712b74c" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e11e3bbe5504cbb61ed1ef1b45d285a2" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c42412a1bcf6316a397122710b6aaed9" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ebff967a51662bfef80207925fdb5fe5" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "012df5ba5975d1d67e2ea01e35d64b74" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "df3fd3bce358f5512660b767a2f0d470" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a004d23951ff9e6f46a096438cbfb525" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "752af7daac637ac980fdf8a8242402af" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4608b218b2d2147ce9dae5c2e8c80844" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3e61e1e3b29b53f76eaadf73ac9b1bed" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "336b672f32392ad02a2915a0217638ae" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "eb6438744630c9dba1833260312b6916" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9e0b6a7ef340b452038b6a068f150282" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "eabf7be6e36f92b6c1d0b357121b6876" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "59b910ba6202389ccaacd5a42090f307" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "50efb68839c84863b00532e8d64b608e" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "398cd429fd799a93768d33504916081c" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "efe9e993dccb9d68e06dab611bfc36b0" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "03bc60d040f94d59f9234d20dee210f0" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "427634e75a9241cc3f854808b51104c0" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6d3d6e46e1573a131d052d4a8a809933" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "86cc6ce7494fdcc87dedc08e61666fd9" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7acd46d2ec176557e6a923a1f71dba55" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "1b0ca4a2736271af9deeb85f3785f673" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e0395aeaecf379d4999b517de047123e" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2dc39db68b5b2dfefc1679917778fd09" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "992a5605cc94fbaf37a512cda0b9efcf" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e98aeeb02e5c206e84969be9c48854b6" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7d4b4516f376412d32abd1013606fae2" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "a2807bc5935eb942e1817c8fe1b8322f" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d819779db0bde9fa3c1a70f55491c61d" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4d3d8819159487ae524710f6a55ef9d2" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "618eb3aeb92661136270d8ec97b0cf4f" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d43fe48134e25129b33918d3a26d5682" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c309b9e8f3b033cdcd66f6ee8db4473c" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "94af18c8b8fe4d27e28c9c52ad63c5ca" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1da157f2f03c353d2a8582b0d03ad768" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5c4557746e06e32ce52b56d75f0ad06c" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fff58269ba29baebbc874b9c6c4b5403" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d6122af64356560bc287c1125185d663" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "37a369325d9983fa3ed0d16de0c0c558" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "2c0425710d04da33f8b835837eae4f20" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e458229df6e5ecdb10f5f2383ee8f4e2" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7e9041ba5738626f2363819d66f5fdcf" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ca86004b746ebb1f10566cfc6a704212" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3f8d5aa4098028454ed8f9f2b930bfee" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e2a75e74802eab2a18541c1abf68e99b" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "9f9a3353e559a79896d1ee4e624fcf60" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7f0da08dbff64b51d6a83a42d6d6283f" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5ece3079612afeac7e4f0c3f3cb6494b" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e33a1365da838d45459f80f103e9fc37" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6552ce348c8a047340e42eb3231ecd73" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4d326a209b36ecaac84a9555afec375b" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "3e278836a2028f736eca4befe008e448" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "23d4cfe104b485aeda3174d1cea8c6cc" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f914c1e208e39d6e642a57c1ab2afd10" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "860b61044b798014c357ff6f10d9beb3" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "06fe33bae3deb07dfaeff1634ead9d06" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "57a2edaf53dd318c2dd12130c8f188ae" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "f475fcad33025680e317391b2e2b2c6c" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ae993c62edff8bf94208677c0d49a922" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a3c358b2fe0cac2c3b5a6f9cc5ab5ff0" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3929388efbdda78ae51cb20caba9e8f4" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "aa67735df41736918e8f5feb78f795d3" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cf1265578a5f4cda15619d20aa083162" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "cb0a72057d3ad00f41f88f60717674ff" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1c11f4d93900befbc0ee0e1521f525a6" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "62523fc73cdb0b835ed06b69541622bf" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "126dae4364f9217715e68722ecd0c348" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "278a6d3012d2ffe18d6ff3de86aae1ee" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "96a7f432c31c04dbf1a18ecd9227fc7b" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "512877b6be9a639179cb63c69f06a1cd" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bc43df6ca930bf4275f1c70aa4802e07" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "cefa27e004118023dc49e2ef4ae9b669" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3f76e710be6bf2972494cd7d022d6e12" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b139a50aad55fc83f6394eeeb29c6f52" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e29ff26cf3a9bcc5da4844e46db7536f" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "a8b3748269d3f95c1d1370e8a8ebdd50" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "bed782fd09e45c347ad559e8e5072ffa" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0cfb21936fd9b599c845b13bd075d9ea" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2725974f1510c1d403354c7c179cc4d1" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8becd3bddf39d86a2c6620f4ab7ac2f3" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b1cb0cbadbb441b3f480f943bafda8e2" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "c0bec3ec5fb95c5a1e474c7e664b8f33" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "62bd8722614fcb6a85e8acf22c7aabed" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "c748c30de74131e6f2b06724bcc4ce8e" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "819a332430157c13e368c8c4a1285c9f" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "13e27af78e462080f98a7c6772b77b50" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9040556dbb3661c7ba114df8368414cd" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "725b7c787d7a55cc13d88444c6778899" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "61ac7247c58c2aac5e656029d855a07a" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e1a9725b87546d033d43533ffa38f804" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "284d97aa60d5afc1dfd06139a2b28f01" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "17f81c61ca1da9e1df7db1b736ee82c7" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "79388d186722bf3a3ec58b1056da0b2b" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "37c6c9f538b80b2baa78a7a3896bdcd4" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e7a9e544367ca01a82bbe95427961fd0" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9e6e53aeb67809933ea41048aad080be" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5af9f3f393c4bd1c8f37631e2c488d32" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a38d3c2f0192991b9b38506bd7f3f106" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4eceb0073e3057b069134f9eff7945df" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "e68d3c7895452981dd0518a9b44ea776" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.40.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "ea6d86343e1855258dc182a800a4d2b5" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0aa61fbea8a0e31e416deef0c16773b8" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d778a8241c6a321827742aee3f35b12c" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "15e031314915146ad0e5c014b3de5987" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e4045c6c5758dadce200c66ad7caedf9" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.40.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "7278f0271d5dd479d62f1c3439eca83f" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.41.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "7dc217df3c8cc97a3744962d5acee566" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f6f0e9975de5fa8c46628ade3b5484da" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c03306a51bfa6fe745b7d5d2f9732a97" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "822fed4f6b5538648ad0042d2841a23d" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e9fd0ec34ac9759a07281a2b1d8f25c7" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.41.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "b3fda6a1b99f53cb4ba29d9c514016e6" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.42.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2a7c46de7f9fd9bf79d08ccb8b53adc5" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2a835ca06c0839e8d6f5a96ed2bebfca" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "131d7d71345e3a0078520a1c463600b4" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a5a4a4eaab2c04e8c56024e99f4ec365" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b2451b322968cb0820b4c857937f39ee" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.42.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "af6878f0a029845d141bd54d41813582" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.43.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "374625e7f063378799667dd3d28791cc" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ae3e41a16e5cf31b113e35a2fdcb17cb" }, { "dataPath": "params_shard_261.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e28fd4dc34f1e77922e7711cb84bdb0d" }, { "dataPath": "params_shard_262.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "24d43470b2a61851f1d0aa724f65ba5b" }, { "dataPath": "params_shard_263.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "2b6758a378cc7c6a71d980813d50ac56" }, { "dataPath": "params_shard_264.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.43.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "4ca2411153668532408d64b41f844e36" }, { "dataPath": "params_shard_265.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.44.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2611f4a9cb926c71ada5888092ca0034" }, { "dataPath": "params_shard_266.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d3a98a8e146848847f848164aa9d0312" }, { "dataPath": "params_shard_267.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "ca9ada01e9a0540a3abc940199e16f53" }, { "dataPath": "params_shard_268.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ae823724cf8520b16cadc221c5ea97d9" }, { "dataPath": "params_shard_269.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "cd901a127dc79e2ed2a764715fbb018a" }, { "dataPath": "params_shard_270.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.44.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "d30ad2527e11a540d5ee02e044f28420" }, { "dataPath": "params_shard_271.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.45.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b19ebcd6bda0f55e1d4f0b376f91767a" }, { "dataPath": "params_shard_272.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "dfa8692ad9cd6ddb99cf4e1d496d2c69" }, { "dataPath": "params_shard_273.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "46fd1fcaf5489e7e2a10c78a6be74838" }, { "dataPath": "params_shard_274.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "adbe51be2b94e9629894b5804e0e9407" }, { "dataPath": "params_shard_275.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e57fb8307e7cf8fe2d8b831f8cf4975e" }, { "dataPath": "params_shard_276.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.45.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "05c499c5b9d362c3b877635ca39cfc21" }, { "dataPath": "params_shard_277.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.46.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "b6e5f4be3e833a985468ca67e993a1e5" }, { "dataPath": "params_shard_278.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f8fcffc0ba2a11858a2b2ebc79360761" }, { "dataPath": "params_shard_279.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "fe795e1ffe879aa8e87c2528531c60d8" }, { "dataPath": "params_shard_280.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a20e3cc24bb52d6f5b3bd9b99fb1f7b9" }, { "dataPath": "params_shard_281.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f2ac52a6590269469fd50fcf884cd508" }, { "dataPath": "params_shard_282.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.46.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "5ac56d3c461228107ebee3911676919d" }, { "dataPath": "params_shard_283.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.47.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "663c3f34e89b5b517bb381fbfe7557ac" }, { "dataPath": "params_shard_284.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d4982bd9762ec3aa8d6fa40208c143dc" }, { "dataPath": "params_shard_285.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d5c0b8f0e14b6d20379753bbeb29ea36" }, { "dataPath": "params_shard_286.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a29cbee9a62d505d7a6ae1e9593e7ed1" }, { "dataPath": "params_shard_287.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ac4b080929fc92ec0e9a6d2066712e61" }, { "dataPath": "params_shard_288.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.47.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "438f04bd27d7883939b2822cd82fc9f4" }, { "dataPath": "params_shard_289.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.48.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9597f604ee79e59e3233da7d887cc667" }, { "dataPath": "params_shard_290.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "25c626e13186bfb968a027b8ad75669f" }, { "dataPath": "params_shard_291.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9aa82634e22ce3364646ff4ee282843c" }, { "dataPath": "params_shard_292.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "16c081950fb0a203c7fe517221bac632" }, { "dataPath": "params_shard_293.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b75130a9426cc5878ca7adc37feb5d06" }, { "dataPath": "params_shard_294.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.48.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "67183418d9b0584f5709593da8b4067f" }, { "dataPath": "params_shard_295.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.49.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9889d460341c02d901966596bf262cdc" }, { "dataPath": "params_shard_296.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7c22439b8a2ff2cbbc205a820e68988e" }, { "dataPath": "params_shard_297.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "24bd92886bf03a9360245a7b415f8013" }, { "dataPath": "params_shard_298.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "07cc9229afe6edc077bdb3a397051d0d" }, { "dataPath": "params_shard_299.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4764653c765376f01e1f11148f024c00" }, { "dataPath": "params_shard_300.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.49.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "3a047aeb9c0455902eef0084424d05ef" }, { "dataPath": "params_shard_301.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.50.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e35827193c0dd5e8bba0c6d37d179b06" }, { "dataPath": "params_shard_302.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "37bdea87bd37d94a0af44cba9d6916ff" }, { "dataPath": "params_shard_303.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f5b3d8d72605fc3ba49ef5e458593a24" }, { "dataPath": "params_shard_304.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "87a7707a1f51ab39f09ee8da7701c9df" }, { "dataPath": "params_shard_305.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5ab4d4423853cd949d394c0a7dbc5e2e" }, { "dataPath": "params_shard_306.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.50.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "5008d6497bda2c5bacc357c4975d84dc" }, { "dataPath": "params_shard_307.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.51.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "898769e571cfbbe506662c5af71fd627" }, { "dataPath": "params_shard_308.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "67caa3dfc7f6df3c7bc411429a957034" }, { "dataPath": "params_shard_309.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "c12ba3baecf6319317e023fc4ea1236d" }, { "dataPath": "params_shard_310.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "642c08c68d1456774cddb62d0d1d37bc" }, { "dataPath": "params_shard_311.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "ce5207136300d3fe4a0ab9f5b523c136" }, { "dataPath": "params_shard_312.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.51.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "28e733c6c84e569f87697b2a7a48bfaf" }, { "dataPath": "params_shard_313.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.52.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "4e5987d3244c48b3aa0d46d1ceee2951" }, { "dataPath": "params_shard_314.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a173c5c565bc1892cfbfd258c1cb6289" }, { "dataPath": "params_shard_315.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "25394fc7a51a99827c101012ac3f5f19" }, { "dataPath": "params_shard_316.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b6f6ab4f9a40ec72cc039bb6840550bf" }, { "dataPath": "params_shard_317.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1bdc90db1498982bf164fb79785c876d" }, { "dataPath": "params_shard_318.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.52.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "0cc4e820530177a74513adb27afbd9c8" }, { "dataPath": "params_shard_319.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.53.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "fd96917f49ea5f021c4c33d30bed32b4" }, { "dataPath": "params_shard_320.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "41c50491e9d060338cb630ab2ba88999" }, { "dataPath": "params_shard_321.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "bab4e1a1288bd5314754b46774495237" }, { "dataPath": "params_shard_322.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7c13baaa72e3f7916d895b1ec526977c" }, { "dataPath": "params_shard_323.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "19f2d96ab3750265f7f12328aae4b322" }, { "dataPath": "params_shard_324.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.53.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "01882d19d9896aa4f0e2f684bed12034" }, { "dataPath": "params_shard_325.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.54.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "8b466389d5e67a19cfaf0cbe6dd8ba37" }, { "dataPath": "params_shard_326.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6e8acffaf0096d5dfdb61a156a7fd4a1" }, { "dataPath": "params_shard_327.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3b953a0e2f17d9d48281e62021524013" }, { "dataPath": "params_shard_328.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9a1a10b192e1f02c27370661bce28fb8" }, { "dataPath": "params_shard_329.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "b2d3f3e3b1e39e0f79967860b7eb6a76" }, { "dataPath": "params_shard_330.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.54.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "774dc85439307a856b0fc4c7aeb20f18" }, { "dataPath": "params_shard_331.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.55.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e50be005424bc55d474087735ecc75e6" }, { "dataPath": "params_shard_332.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "3ccd3056e2ec39fec7e2252ebb7b5201" }, { "dataPath": "params_shard_333.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "894f23f697dbc67bff0d490bfe3c6ca8" }, { "dataPath": "params_shard_334.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a4794ec700952f23bc76220dd1d76ff4" }, { "dataPath": "params_shard_335.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "5e2c4c525807a481f26cf25492a2f479" }, { "dataPath": "params_shard_336.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.55.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "72d56789d090541ff4e311d0a2136035" }, { "dataPath": "params_shard_337.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.56.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a22693c144d25c14ea4c4878c8ef0c06" }, { "dataPath": "params_shard_338.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "63403d019ae94759c582efce72f9cc56" }, { "dataPath": "params_shard_339.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b20eb650134daeeca6eb08110c219595" }, { "dataPath": "params_shard_340.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8bfc35a9009de9225c3a12cb196e7954" }, { "dataPath": "params_shard_341.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a19d8cc746138a7b60884b207ad28ded" }, { "dataPath": "params_shard_342.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.56.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "0ee1f7a8f52a37163816027cd8631d7f" }, { "dataPath": "params_shard_343.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.57.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "507c3daa67d84b90e930c67fa52a15b5" }, { "dataPath": "params_shard_344.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "db67da53b6209bf7ff8448563296d6ef" }, { "dataPath": "params_shard_345.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "67e3c67bd2a84887ba16228e28dd0ac1" }, { "dataPath": "params_shard_346.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2f6fade64d07f3ca3fbb3d257a49535a" }, { "dataPath": "params_shard_347.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "16925bb2bfc6d13779512a252839c5b9" }, { "dataPath": "params_shard_348.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.57.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "e03fe4d8fb5783798a74f2b88bb22f6e" }, { "dataPath": "params_shard_349.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.58.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "9294f9b7199e1d8c3917ae043d9f5ff1" }, { "dataPath": "params_shard_350.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9fb78979622f2d2d1d60af9ebf040934" }, { "dataPath": "params_shard_351.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5439d9cc96994bb3978b2bedd1661a30" }, { "dataPath": "params_shard_352.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a1d1883a4127ea2ca97161a38e77357b" }, { "dataPath": "params_shard_353.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c8f52ab478c45d46bfe5c1dbcca973ea" }, { "dataPath": "params_shard_354.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.58.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "6bae95a7fbc2f6b207dc1b34143986df" }, { "dataPath": "params_shard_355.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.59.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "62c686f0eb51e72b81732e4d0fa48e04" }, { "dataPath": "params_shard_356.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4a29ade65ec4c06248fa509a9dddf7e1" }, { "dataPath": "params_shard_357.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "28bed2c7b3b1ecdf4b4d84b6c8093498" }, { "dataPath": "params_shard_358.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "26bba2dd3ca1f5993730a127ec469c31" }, { "dataPath": "params_shard_359.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "79b165d2163381b9f2946e97c33b877c" }, { "dataPath": "params_shard_360.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.59.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "c9c3584887279af8aa8975b33630dcbe" }, { "dataPath": "params_shard_361.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.60.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a5c35e28fe19aeac8809ca6fbc65812c" }, { "dataPath": "params_shard_362.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "46abccbeaada7f1686efa4d572f2288a" }, { "dataPath": "params_shard_363.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "2bfaf5563c4b8eb5ce36b2894be64da0" }, { "dataPath": "params_shard_364.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "7ea0ed2d45a0289cda776e163eb62f60" }, { "dataPath": "params_shard_365.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "acb215a9fb8d817566ec255a88741a4a" }, { "dataPath": "params_shard_366.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.60.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "24031b013402757a95b725d1d58c624b" }, { "dataPath": "params_shard_367.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.61.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "57b888959c536bcb8bf205cc7b50152d" }, { "dataPath": "params_shard_368.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d4be0bfad9a37360e8265ebb8312f851" }, { "dataPath": "params_shard_369.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "aa014397cc342d8cda1359d485dbc65e" }, { "dataPath": "params_shard_370.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "fc9fa177e2d442e80b365be7a2db9a92" }, { "dataPath": "params_shard_371.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "acfa98e682d887344f9b823ed57c7eb1" }, { "dataPath": "params_shard_372.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.61.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "9ac014c4df99567e2f0d5756e989a56e" }, { "dataPath": "params_shard_373.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.62.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "3d0caab08ff06305cbc7f314eb1b63c4" }, { "dataPath": "params_shard_374.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "03b557a65f7c6d9989c6979c272f4110" }, { "dataPath": "params_shard_375.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "5cfee4396006c3232b8e5cbc4195e4ce" }, { "dataPath": "params_shard_376.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "01068ac29c21f9bae4d47f07afcbd742" }, { "dataPath": "params_shard_377.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "84734260cde97db9b2a28884fb51840c" }, { "dataPath": "params_shard_378.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.62.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "4721356a89e76fae03225dc166c64b45" }, { "dataPath": "params_shard_379.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.63.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "2bd737a0768c57fc61767eedd3da1b24" }, { "dataPath": "params_shard_380.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "775c04cdade416b12a90f76182a0e236" }, { "dataPath": "params_shard_381.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "13e2e18669fa832fef5c03d4b7bf7af0" }, { "dataPath": "params_shard_382.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ebf12e03b354719fb76bf41855d9cca5" }, { "dataPath": "params_shard_383.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "4e2c7dfd6c7106cab04e9789c51b7ce0" }, { "dataPath": "params_shard_384.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.63.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "898ed8bebc824047cc7b661d3ecefaa9" }, { "dataPath": "params_shard_385.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.64.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "5f3ecdee3ce4233813abedf394b394e1" }, { "dataPath": "params_shard_386.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.64.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9113a6c2931b3e2729e6872f9e273d60" }, { "dataPath": "params_shard_387.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6cda3ba42c9dd2b0dfde3c0da425afdc" }, { "dataPath": "params_shard_388.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.64.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "951e7a961c4b2f401a35777ef037b17b" }, { "dataPath": "params_shard_389.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.64.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "227f7680ad6a40274c63e35d1a4dba94" }, { "dataPath": "params_shard_390.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.64.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.64.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "890e02eb523a0696740e51737221a9c9" }, { "dataPath": "params_shard_391.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.65.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "89bec5040dddfe37b27f8d40388b075b" }, { "dataPath": "params_shard_392.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.65.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "16da05d32abdb1ab271f1b8a3e9487d3" }, { "dataPath": "params_shard_393.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "6b519f1b4763fc05c1518c9daf5c62e6" }, { "dataPath": "params_shard_394.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.65.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5c2491aafa5d89eaee143f8986625202" }, { "dataPath": "params_shard_395.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.65.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f1388f14b8986ed0a660afd2dc29b339" }, { "dataPath": "params_shard_396.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.64.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.64.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.64.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.65.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.65.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "dd39b8a131088968d99fb6a49a31f3dd" }, { "dataPath": "params_shard_397.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.66.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "fe1799ce3b4f78bb76b2fc7b012ad469" }, { "dataPath": "params_shard_398.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.66.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2961a9e76013f4cc8cdbb438893d7f38" }, { "dataPath": "params_shard_399.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "3ca2f5aef783f3434a52c1be376c5beb" }, { "dataPath": "params_shard_400.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.66.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1d2d2a9ff595a15e18a1cb12f0e261c5" }, { "dataPath": "params_shard_401.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.66.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c6487217b5ca1d13e465239815e7d226" }, { "dataPath": "params_shard_402.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.65.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.65.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.65.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.66.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.66.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "1ab701cd78af3b96f24c728dae932c19" }, { "dataPath": "params_shard_403.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.67.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "64346add10c58ec087a6e2bad28ca61e" }, { "dataPath": "params_shard_404.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.67.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "4512af4fdcd68d523299ecef9c3eb1ee" }, { "dataPath": "params_shard_405.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "63ea311ecab57d56745039d9a5e44b25" }, { "dataPath": "params_shard_406.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.67.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "52243cbe67bf1680d3c2952996cb67cf" }, { "dataPath": "params_shard_407.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.67.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "d8836abae906061db23356323834aa9f" }, { "dataPath": "params_shard_408.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.66.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.66.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.66.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.67.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.67.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "3493add60010de68c1b0a1db8642f568" }, { "dataPath": "params_shard_409.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.68.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "d0362e21e91beeed36049e3cf86036d0" }, { "dataPath": "params_shard_410.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.68.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d06c6a731f5cb92546bebb797c3e8ee0" }, { "dataPath": "params_shard_411.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d3d4f562b872fcf671348de7958ba5e0" }, { "dataPath": "params_shard_412.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.68.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "392c6246e17701d8d58f3c9519fdf246" }, { "dataPath": "params_shard_413.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.68.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f7d9917f207a7971d0be89a473d6d8a2" }, { "dataPath": "params_shard_414.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.67.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.67.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.67.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.68.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.68.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "c4f23fb0c836de7034aaf5174e1b3993" }, { "dataPath": "params_shard_415.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.69.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1a1ef7b4b4311054fac6ddbfa332470a" }, { "dataPath": "params_shard_416.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.69.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "b943175c25499b4aa81d59703a90c36c" }, { "dataPath": "params_shard_417.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "80d85f785d6f039f694ade4180f5233e" }, { "dataPath": "params_shard_418.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.69.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "312b0ea77ec14fc0c9a2b3c730fa8a22" }, { "dataPath": "params_shard_419.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.69.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6d6095a425e03e721d25322d6bbdb313" }, { "dataPath": "params_shard_420.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.68.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.68.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.68.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.69.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.69.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "5907ba7f58fe04573480c56805cd06a3" }, { "dataPath": "params_shard_421.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.70.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "e22ff8ce332a0a73082cc7ded803b0a6" }, { "dataPath": "params_shard_422.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.70.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "701fa19f952ffea454247cf52683c941" }, { "dataPath": "params_shard_423.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "d37d1e41eb2c537081ea3da7a56fb83f" }, { "dataPath": "params_shard_424.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.70.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0a383acfadd7e07f5efbe8339b1ecb78" }, { "dataPath": "params_shard_425.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.70.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f05866dbb3cd92594a0a4ea098181859" }, { "dataPath": "params_shard_426.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.69.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.69.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.69.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.70.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.70.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "6cf50e9a91ef7182e0d4f9c14a7aebd0" }, { "dataPath": "params_shard_427.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.71.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "04592ca0cfa5ad637d7b707b3806d1f9" }, { "dataPath": "params_shard_428.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.71.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "77f73fa70592063a146c366942b6da09" }, { "dataPath": "params_shard_429.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "106e2e5288c084fdf24cb38606a3f6ff" }, { "dataPath": "params_shard_430.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.71.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b5f0bb8c86151fb049b73d0257f49934" }, { "dataPath": "params_shard_431.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.71.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "fe705a93811b97d41e68467724808329" }, { "dataPath": "params_shard_432.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.70.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.70.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.70.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.71.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.71.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "77aef38e7f0bf3439ab56bfae82b2446" }, { "dataPath": "params_shard_433.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.72.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "205573efdcf20a2a6f05971b038aa69a" }, { "dataPath": "params_shard_434.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.72.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6340eaa9051ce7af8695f9c70e9ca666" }, { "dataPath": "params_shard_435.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0c06b6f430a54095335b4bd3feb9428f" }, { "dataPath": "params_shard_436.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.72.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f24d07886cc45c582461dafe8f3992e3" }, { "dataPath": "params_shard_437.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.72.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9787e9629802560c47975b2246026c72" }, { "dataPath": "params_shard_438.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.71.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.71.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.71.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.72.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.72.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "eff87eb8df2fd9d416ac8fcb82ddc0ac" }, { "dataPath": "params_shard_439.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.73.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "1c344c707efc2fe7949af5faab0fc97b" }, { "dataPath": "params_shard_440.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.73.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f9e6d164781f9f01e1a1fb6eea09727b" }, { "dataPath": "params_shard_441.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "8d6b5201fcf0b5e4589add90db02c2df" }, { "dataPath": "params_shard_442.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.73.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "99be8f8ee7317bda642e36efd4b74633" }, { "dataPath": "params_shard_443.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.73.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "0da75744c385e5b76de32f2aea078a2d" }, { "dataPath": "params_shard_444.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.72.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.72.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.72.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.73.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.73.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "987169a64c7af0b910d9f9c40c08a1cb" }, { "dataPath": "params_shard_445.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.74.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "047612ebde13b6d550d4878147e5ae0c" }, { "dataPath": "params_shard_446.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.74.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "669694bce70a0a7bcc7f80797884ddbd" }, { "dataPath": "params_shard_447.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "87e6936a91fc544a2ebd06246fc21adc" }, { "dataPath": "params_shard_448.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.74.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d5fd7ab14db07f7450e499daf593d209" }, { "dataPath": "params_shard_449.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.74.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "79add4990b5f75f0954b9adc81a40bc5" }, { "dataPath": "params_shard_450.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.73.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.73.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.73.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.74.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.74.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "0946db163b8643fc06667225f843a26e" }, { "dataPath": "params_shard_451.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.75.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "a7ac0342ba94f26e80257cd4aeca0883" }, { "dataPath": "params_shard_452.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.75.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "66201ac4a8933ce649713bfceeae54bf" }, { "dataPath": "params_shard_453.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b8707b316c631457c70bd6c1b628cc64" }, { "dataPath": "params_shard_454.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.75.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "64d094fc97af0753742b339583737dc7" }, { "dataPath": "params_shard_455.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.75.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "09ca1a590ceb8711607a1a0a0dd51832" }, { "dataPath": "params_shard_456.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.74.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.74.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.74.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.75.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.75.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "adb4defec811675b023e97de7eb7c733" }, { "dataPath": "params_shard_457.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.76.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "947913fc58477ab9ffa47a5b89894d65" }, { "dataPath": "params_shard_458.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.76.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e2e6f1c52accfcfe9d82f10960dea6b5" }, { "dataPath": "params_shard_459.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "590199dca2c197446fbb85bd3ca43baa" }, { "dataPath": "params_shard_460.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.76.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cbcff28d1dbe1a67149eedbd0014372d" }, { "dataPath": "params_shard_461.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.76.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a9923af4b76a2e3e65e4861adb27d977" }, { "dataPath": "params_shard_462.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.75.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.75.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.75.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.76.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.76.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "c4ce9d733ba438afc2a724c135593f36" }, { "dataPath": "params_shard_463.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.77.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "244fb13d822be6c7d06898a735e60559" }, { "dataPath": "params_shard_464.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.77.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "a5853e7acdf93790a330f47580bfa51e" }, { "dataPath": "params_shard_465.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "0d5c22a0a5988d73d09471d89fc163c0" }, { "dataPath": "params_shard_466.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.77.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "18acd8799ca326ebbe3a2aff29e6e3ff" }, { "dataPath": "params_shard_467.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.77.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "7342c731c9ff15c5adaba8edf7b4be16" }, { "dataPath": "params_shard_468.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.76.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.76.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.76.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.77.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.77.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "cdd8135716bd2dc5b535341e3eac820c" }, { "dataPath": "params_shard_469.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.78.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "f8c80f59c4df01148f9c7aa0b4c025c2" }, { "dataPath": "params_shard_470.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.78.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "811de3bfa66f2f8b5dd4543560fc9d21" }, { "dataPath": "params_shard_471.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "61877e6bbdce8abddaa443d87579ad99" }, { "dataPath": "params_shard_472.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.78.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6866e9fecdfe546015f91f43a79ab4ce" }, { "dataPath": "params_shard_473.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.78.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a980cbc566cf413ef9ae6dcfd704e874" }, { "dataPath": "params_shard_474.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.77.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.77.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.77.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.78.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.78.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "bff4bb2bdb94554add45ff8cdd05d1e6" }, { "dataPath": "params_shard_475.bin", "format": "raw-shard", "nbytes": 41943040, "records": [ { "name": "model.layers.79.self_attn.qkv_proj.q_weight", "shape": [ 10240, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 41943040, "byteOffset": 0 } ], "md5sum": "531fb937911ccee7083556ee675b6b14" }, { "dataPath": "params_shard_476.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.79.self_attn.o_proj.q_weight", "shape": [ 8192, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "40962de371c53cf1376f752638a77bd8" }, { "dataPath": "params_shard_477.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_weight", "shape": [ 57344, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "935323b86bd8a4818704bb722b125f78" }, { "dataPath": "params_shard_478.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.79.mlp.gate_up_proj.q_scale", "shape": [ 57344, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "42678600f504d0ef41c6cc1537a33354" }, { "dataPath": "params_shard_479.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.79.mlp.down_proj.q_weight", "shape": [ 8192, 3584 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "c115c02ed782ff7ff9a7cfed9d686064" }, { "dataPath": "params_shard_480.bin", "format": "raw-shard", "nbytes": 24150016, "records": [ { "name": "model.layers.78.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.78.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.78.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.layers.79.self_attn.qkv_proj.q_scale", "shape": [ 10240, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 5242880, "byteOffset": 14712832 }, { "name": "model.layers.79.self_attn.o_proj.q_scale", "shape": [ 8192, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 4194304, "byteOffset": 19955712 } ], "md5sum": "fff910192a63a35b8b64b2ad87e7fa09" }, { "dataPath": "params_shard_481.bin", "format": "raw-shard", "nbytes": 525336576, "records": [ { "name": "lm_head.q_weight", "shape": [ 128256, 1024 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 525336576, "byteOffset": 0 } ], "md5sum": "ffb9a4821e85c82ea4b4036509e192c1" }, { "dataPath": "params_shard_482.bin", "format": "raw-shard", "nbytes": 65667072, "records": [ { "name": "lm_head.q_scale", "shape": [ 128256, 256 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 65667072, "byteOffset": 0 } ], "md5sum": "42dcf5cff51f226c09e1243f3df821ad" }, { "dataPath": "params_shard_483.bin", "format": "raw-shard", "nbytes": 14729216, "records": [ { "name": "model.layers.79.mlp.down_proj.q_scale", "shape": [ 8192, 896 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14680064, "byteOffset": 0 }, { "name": "model.layers.79.input_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14680064 }, { "name": "model.layers.79.post_attention_layernorm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14696448 }, { "name": "model.norm.weight", "shape": [ 8192 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 16384, "byteOffset": 14712832 } ], "md5sum": "f266c5ce63191a16a76da09c5e11b48a" } ] }