openchat-3.5-0106-w4a16g128asym / ndarray-cache.json
numen-tech's picture
Update model
a31a765
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 3734421760.0,
"BitsPerParam": 3.0339086581997106
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 65540096,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
32002,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65540096,
"byteOffset": 0
}
],
"md5sum": "56833a7921a8a4421b7a449d285fdac7"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "dc01a9b2e6975ad42aae130b90ca6a59"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 33267840,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
32002,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048128,
"byteOffset": 0
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 2048128
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 2056320
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3891328
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 3899520
},
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 3907712
}
],
"md5sum": "4bb2705bc6d87d5413cef4a261c3f005"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "52ca4afccfc3ed4cc205d4589472a7c7"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3c23781e898ad1a4d72b62200f8f50cb"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "53db3e4df12645a99c6bbf2a41711d6e"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 27156480,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 917504
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 2752512
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 2760704
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 15343616
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 15736832
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 24125440
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 24395776
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 25313280
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 27148288
}
],
"md5sum": "1e86dd0a3f39b25dc066be9ba9a5c255"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "4a542de356bf260dc1af31b6729ba469"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "812330933a07b69874b196fb50bc82df"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "55ffca70a7bbd2b6b15d629442c22876"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "45f6ace729989768d5ee9449390d0902"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "6975df31884b9067e4d06c144cd2674d"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "2eceee04604218d0855667479cc4444a"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d30d3e2441d6dc981fe80b2d8e6c7d79"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f762bfb307fbe9002394dc96f85f7c4c"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "ad6828020c4c48e325365fb1b7032ca4"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "881d9d8071825c90d3ab210c5c0f9aba"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "30c1873b98c32b119adfb3c6da320e25"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "6fc81c62fe1d375d2df8f8aa4f180e41"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "be97f7ae51b10dfa926916ed330fdde4"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "ab018691945ff633c1e29176ba13f97c"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "2450b2516522508317fe1f93afbe15a6"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "00e5460db2e383194451af2ed9598c2c"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "ce455a6ed74c53852ba84b5baca2b81e"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "17a58c8217a758d561ab4714fde740d4"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0db84179132fe302f7996f22b75a253b"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "67a93905954ff0b1d08c917eb36620d0"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "188b7b11de48ae165f61b8768e1764cc"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "a2b0bc65ad56d69fed220fc0483fbd9f"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "e1f99acd8fe39a64edf3f500f4b52ffa"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "7173240b70fede652124ddb2bf81fed3"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 65540096,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
32002,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 65540096,
"byteOffset": 0
}
],
"md5sum": "4fdbd44fa38dfd48055e23a938d61b1d"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "719b220238a02ef815d6e31e13ce528d"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b730e2f62adf34810284687221d1a73e"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 26452096,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.embed_tokens.q_scale",
"shape": [
32002,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 2048128,
"byteOffset": 21635072
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 23683200
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 23691392
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 24608896
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 26443904
}
],
"md5sum": "ac5361e95cf27eff12b1e6da8885c4b6"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "fbc7e9db9f42b55f2a407d0a09bf1782"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "792daaa35b502a722862c34452e1e42e"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "a3eed6594978b551687a9aed9f80c65c"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "110c5de0b3918b94ce0f19e399975d85"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 23461888,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 21626880
}
],
"md5sum": "08212bdf8eeb95da934a246b75804345"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "18c1d41e9af0e97003e9bc77b2ce361a"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d7d2360488b874dbcdadc35cf28ece87"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "e6a1e476fd537febd214b41214c742ed"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "cdb226df8fef9e187770957cec017bdf"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "3ea93342708442ab731190e74389063e"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "e4a6b6410f93cebee6ff0021e53c8b19"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "020ab73febcae6fa7349b40b288d5e01"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "af2aaa6a5c00ac0d8f75aad350876d8d"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "46a7c35aa7d94186a2ce55c7c96926f5"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "86203a657fb29be8caea857769d98851"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "9c95a9c9867c899f2ae49905d12bbcc5"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "75b5b50047264924b1e45aa5b65a86ff"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "9067d4de0426f83741b30226deefe28f"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "aaa437c2ccf6ae22baa339e98fb33b18"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "f03d2277af59c7f0b6bc062a2c3af860"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d4f81306727708284297a2ab4d284407"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "df798011e810562258544ae52e776d21"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "31c4797c77bae77848b2d06f34f9e58a"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "5404ae5c4df0edb3b9d5efe3588646ab"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "90c9e9e52cf222c429c836cb0977abac"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "2c1d8f0c9b0f1f811246e3758bfc6fa3"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ed73c1e5437b96225a29cc8e7c16ac08"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "1ec828ab225692216555d804d7f42c92"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "caa5c11c9835993595ff2a3c1317400b"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "2569480618f773636705baab71cc4074"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "2feffa7977a63268c2c52e1143549f4b"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "98cd8ce3ddc89d04ad45c9e57219da13"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 25329664,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22552576
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 22560768
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 22568960
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 23486464
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 25321472
}
],
"md5sum": "c5ff8898dfff8fa8aa410b3121d8bb9d"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "561d70b04f5ca77b86e4fddf283d0bc2"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "681b74fd7b0fc616a2289ff20a8ac383"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "287a877e7693848406fbb2ed7134d6e4"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "5afa7f3040ab8cabfd597bc049790fd8"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "1892d0c59f1704a359749ee1820342ef"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "a8163bce4b0917b73385159381444d0d"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "e849d104fd6b0ae75f664b107d7060e3"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "58c625a71637c4aec1bc0bacaf1d711f"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "c3726b39e892516e4cae5b73f5b1c2ae"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "50861f07837af2d513baf5aa9daa5bc0"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "fb6874bd9d4263d05cb68a0926863816"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "be15024739461158b0920b509fec381d"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "751ce0ee773af13e06f02ede68f3eeed"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "9ae2de30eaf8508f4b80c33c16ece290"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "6c83958aef13e1a49f68c6c3e09b4cdb"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "027d44bfa2076343bda4fb48896a31a0"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "0c97a06e8ebf730ed9b370e90b633844"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "abe5bc5b43729de67af58628c60f8e67"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "836a5d8b5bc0f5ec936ab82e26d3c0ad"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "9d4085a04c5ad47b4121ed1a5a2ca7c2"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "cc3407074dbe132efbe493ea57ea74d7"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "340b7a0f239e5c5701092b52c2434cb6"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "68d7e1da31fe18941499441abcbc6a27"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "0eb6b62cba1d37c0cdc276b66c5dd436"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d7249378089fc86b0f66233b20a78329"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
28672,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2017eeefbcc062e4ddb1aa6aea4aeca1"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 24395776,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21626880
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21635072
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
28672,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1835008,
"byteOffset": 22552576
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24387584
}
],
"md5sum": "1e801e63286cf3090bbd21a9a29c7aaf"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
4096,
1792
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "a82dc5a9293487560ad51ac84d073547"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 22544384,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
4096,
112
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 917504,
"byteOffset": 21626880
}
],
"md5sum": "d4620b03e0fd6302868a38bd357dc9f8"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 21626880,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
6144,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
6144,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 393216,
"byteOffset": 12582912
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12976128
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
4096,
32
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144,
"byteOffset": 21364736
}
],
"md5sum": "462e7ecbf113c50fe31795499409ab46"
}
]
}