riczhou's picture
Upload folder using huggingface_hub
6bc81a2 verified
{
"metadata": {
"ParamSize": 533,
"ParamBytes": 9234108416.0,
"BitsPerParam": 5.001536828453907
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 389283840,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
152064,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 389283840,
"byteOffset": 0
}
],
"md5sum": "7932312201ae2761485f7d412eb0dbfe"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 48660480,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
152064,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 48660480,
"byteOffset": 0
}
],
"md5sum": "e8920c096ece303715da29d808f834ba"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 20658176,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 14336
},
{
"name": "model.layers.0.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 18364416
}
],
"md5sum": "0b571f3cb6e619d96c6d40630c3be1ff"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0873eb6ddb15076ff9e499315425a609"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "538a9140fd4d851564a8e5686c890abd"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "b459ee0e7734cf867044df5858581b06"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.1.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "1a9657c43d2bea8e04a216f3be1b8e0f"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "1cee758ccca6cbe36975f5901632cb6a"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "23967261fbec14b496b7fab724ada7c6"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "4747ff8880b109fc7cdac257c72e0909"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.2.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "a03c85f259f400f3ad626ed448f8de06"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "5d1502cee18ca38bf9569a4f2900d035"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "7e822acd73e2feb8ca61f8aacdcc9f25"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "c172f544d6b820912a5d1279126b07e3"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.3.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "ea7119633580bd91b68e726d11ce41bf"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "edc4493565017e3614f855e31a8b9cea"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "e70639e61c5a6f828e70f53c956c7a5b"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "54cce2674637e5a9a9b724d8abd43d5b"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.4.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "62990000ac40d33b5e13e80a6794e67d"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "9cb8aa1c245fb018feb6cbc935dbfe40"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "87ecc2cdf173992e37c6e22b776da3c1"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.5.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "8e353bc00035e776d9db63e7d5196bb4"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.5.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "70bcb66e8cf3b0af54f269770cb4a75f"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "25d76912d6a3c86237e5cbc316967078"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "49d47a73555153bb2507b20d9bd04774"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "cbdf427b1e0acda5b18d6dcdb636460f"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.6.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "e44ccd97c2fcb8364ca54290494bff5f"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ced8e045499bec3933817d6953afe681"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "817fa4fc575c020e227a82065a0c0ebb"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "c5087115c379bdb08e441defb3c6884c"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.7.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "8626b889867fe49ec7f6423d10fbea38"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "08f7521d12af06f4dc3005b63467301e"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "9083a533bca8129b86a454936dd3be31"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "05e0fb5fa6e0a151dbbd29826d43d1a2"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.8.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "79999b6e37dfb6ff24960a6ff5eae8f5"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "80e4fc34fb71582c2691088376138e7b"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "b95bfd81c4a8d8176d170b1e6034ca7c"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "95f356828a6135b35c54fa222f55c305"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.9.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "52842279650a298ddeba0d927e2ffbca"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ed6f01edc94144612c873b5c04b7ed3e"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d488cc2a2c313f52f333c0dc99274ee4"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.10.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "d16baa55d9af207a1cbfb9976386735d"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.10.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "89e8cc5017ea54b8dfcb39190473b7b7"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "99a3e961945d71a0cec35bd853b33453"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "9c2be96534fae38bc36c210fa1c79ea9"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "2fe138d29ead13a985595e1bc8feb62e"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.11.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "3d3a7cb57e3525893704ef5b90577cf6"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "aa05a36c803f9c3f170792e037edbdb7"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "30273495e93ff39265054d6e5dea870f"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "e74089f716018d9bf10283b8d3ece561"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.12.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "16e47105dc1c0e85f0e994ebf39e953b"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "8eb1ec86ab7eeb2a709f75a0bba77ee0"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "c0ec0c74c648b0f53a9097b90e280ae4"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "64aa5be14a58a85ed8a1bbe4489274c0"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.13.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "20f4b8fa85b48dd5d408c9006ea39730"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "763857f90cc3eab1994051b5d512edde"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "b7894130f44658e78d760cd5dc55703d"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.14.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "0831947aac962e67314d37cb09ddec32"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.14.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.14.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "a82e7b0dbb93f2eec513d953d6d64cc3"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f5065c02abe656e842bb387bbf34e03d"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "05e657b42e97ae7ef1bb1fb6e34bf234"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.15.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "51f7126252aa56c433965174f6ae8eb1"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.15.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.15.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "a99dc9d0a4f83c597d39f355d211261c"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "467aaf4204905861b799c59e892214c3"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "7b8d903b91bbd9922e546bdb5d793b3c"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.16.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "af905a61d3e62e1c40d17e9fab5bd16b"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.16.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.16.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "a94f062339b6a79734db0550f046531d"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "12c180ae705fcce191cbaff8f3cfbe4d"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "f4e950d83deff4a32cba546438d5929a"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.17.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "82674b200d22e463869c3c43a39b665e"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.17.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.17.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "73699ef754e40aa5b8873257abce71bc"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "985b5840b8ce0e375de207b3a5f8c0de"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "1745418f4676400a521012ed8ee6f076"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.18.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "33800e5c32e92342b639417acd4dcfb1"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.18.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.18.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "d8eff008543147da2e0b50398c898e0f"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "4be508de3d9252438aba4f647de6264b"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a0da1461b1c30b225d718ba3f0b7af80"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.19.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "f7d5d7e457dfd1682b881bc8680c3ad6"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.19.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.19.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "1d817f83be19dca2dd0aa5fb9005e0e7"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0fa9918f677c5a00f073d2e86ec8d7b3"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "80c2fad6fa1a0ba5d6066a9e259881f4"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.20.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "0c6cd38a76fd67e2d7abc5d9b6d2f4cd"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.20.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.20.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "943bfdca7c057b2f1a7d2686821ad174"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f9a846261b8fc87f398ab0a1cc8fb787"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d618f1cae13906bee204a4da7da60711"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.21.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "6529c3c0446cb21b0526e3843b4fd948"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.21.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.21.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "15dbda57f274cf542618cd7d8f2642dd"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "84150ee0028376bdd451fc25900b4185"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "c8b8cea6013b424828512d64b2909012"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.22.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "b74ca5b23f0fa5238378a71c52495bcb"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.22.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.22.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "8b428d8a7ed1dbbb3bd9f41a7781a026"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "08e69d08505661f7551a8c57d4316820"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "70b07c919d27e7fc5595380be4b158b6"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.23.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "51cd251c279e163f9a8b7f64b287ff37"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.23.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.23.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "bdd83cdfcc8c25d5df7d211b9abd1295"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "8a4642756149f1b308b86fdd97419214"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d95ba4f66725445a62546074ea53818d"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.24.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "ff4b728677086705e97e96bdf9b6c29f"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.24.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.24.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "3d92c34837d04ac9473cd614178dbe9b"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ac4877752dfc1a86cbfe31f28bc6fdec"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "5788e8512001ff0cd3563119964fe040"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.25.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "8cd209791b3fe3f4c7fb7c2a9f5bcffe"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.25.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.25.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "1cc83de03adc4be34a9f101d8458bed7"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "fd1bbffefbaf25795ef7f378c0ebe4f9"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "8b48d937058816c233df2b2cb7e21371"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.26.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "7cc7456591121088da5eee758d724977"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.26.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.26.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "fbe3172066f94391ec218094d47effe0"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "9118afaa62dcbdcb41c31eadce88a92f"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "bf5757fbe6013d4e2b483c6d9d2042cd"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.27.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "23a0b2af24c9ce6480e89d7c4fdc3420"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.27.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.27.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "7cf3d78301737f206aefa5b477ff8fc3"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "61daafc8f43b59c6d308e2633a0895a6"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "2c26d21b1c63f596add98bb5143492c6"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.28.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "15ea8d0201aab14cc89e5cb37785f977"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.28.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.28.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "06cbe75a6c667f5efab284447f13a0e6"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "d80b98733c25d4f79cdf2be5f080a042"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "abb4e1d206b66220eed4d8bdc889b08c"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.29.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "8c8b216208b08dcaf88b2146c28bdb7d"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.29.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.29.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "6dbe8312f9b5e1f0982e8b892756f968"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "901d320d6c86427c0edf144d3d2a0b5c"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "2a24bc0e1dd267b68c7a1fe177d25d53"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.30.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "3977a531dafa2b60aee26c7b7f5dd9ff"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.30.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.30.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "9d861f7c2991350e657600955ee7a929"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f79c4877efac93448bbce943e63e14a0"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "507be0ec8d8672267c475fbc32deef1c"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.31.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "605496ad25e251edbc9462e980b474a8"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.31.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.31.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "4a6c3e38d3efc654ab663dd5756ab24c"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "3a3dcfa6f5882671de2bcf6ef7713d15"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "5eee66ca8e0477316ed1c5f2d0514ad4"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.32.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "ce5ec5b9485fff8f11f13600b17c2fdb"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.32.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.32.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "da8a6a861570c85ebd946ecb484c31cf"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.32.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b9478d35558d6401767c99879841eb01"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.32.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "3664490b9465d3195ff797bf2a9cd057"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.33.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "d8d5493dbbd4f93991a39712e830a5f1"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.32.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.32.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.32.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.32.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.32.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.32.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.33.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.33.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "ec855fb0d071f621cd9a8cb25a548508"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.33.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "b8cbdd03ec81e73ebdcade755d835348"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.33.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "def6a9c5e8f6dda0a4bee7f77c2a17c3"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.34.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "71b3d1f9f01f374a24e8addc59c1e02f"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.33.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.33.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.33.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.33.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.33.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.33.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.34.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.34.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "7b9fee89096d9c8dd9db27bbf8b9506e"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.34.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "df7bb42db4210ca3d9728960aed736b4"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.34.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "cf98ffea17c329afdf228eece5813470"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.35.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "962959737c5f9b032aa38df86940f7a4"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.34.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.34.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.34.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.34.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.34.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.34.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.35.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.35.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "d876828df99073e204111cbccccf1817"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.35.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "e0f66ff197c46575e6ceed2562dac7f4"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.35.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "2e17099e223959ed75ecfed3e4a20833"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.36.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "8ac3e43c814a1338d7aae590b2133039"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.35.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.35.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.35.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.35.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.35.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.35.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.36.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.36.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "a706c2e4e9360bf828d25e5215d0f8c6"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.36.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ed6dde365aea49fd12a58526df50e537"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.36.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "12046d09e74cf4be566b5ce91f8267d1"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.37.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "78ba5b4f37352c7e7367add8a4e04486"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.36.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.36.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.36.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.36.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.36.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.36.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.37.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.37.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "1a64525c20202bde63132e15e2bad4d1"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.37.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "aea9c0bd272b054b31b300d9a1cd80c6"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.37.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "8a7b7b3bc7accdc806222461d7931365"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.38.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "485e616559fe9c2c444f7cbbe2e66e1c"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.37.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.37.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.37.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.37.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.37.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.37.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.38.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.38.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "920dfd9b5496e048ea6fd82ce2b8750a"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.38.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "c595d4e3df3def50b47418cfdff16e26"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.38.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "436a74975377a0e16e0efc983cde09de"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.39.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "0bd1df8b32b7f3ddd019cb2aae21128c"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.38.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.38.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.38.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.38.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.38.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.38.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.39.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.39.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "15d886d86562b7fe39046afdeabbcd32"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.39.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "ad01b50c27ca63ffe7f5f3df9bf8290d"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.39.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "edf503612f59edf7d6355eca975b6141"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.40.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "f5bc34da4a881064e2aff08a2ac7c1f9"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.39.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.39.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.39.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.39.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.39.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.39.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.40.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.40.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "eaa533f9ce5908ecf60ccd7dc80881d0"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.40.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f0fe216d86609c630df4af6de5aa0b6a"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.40.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "a8682b6da6ae4bb1b3df5ff71e63c39b"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.41.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "f8b230bc58165f171686c2c219e2b827"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.40.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.40.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.40.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.40.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.40.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.40.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.41.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.41.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "23e05cbb42f9b1c85aff4ff74c929800"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.41.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "c17b864491ea836dc561700086748c81"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.41.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "d99c2609a8e53e906b98dd4fdf4e0c31"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.42.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "19ab27129ee5a4329c92d614262339b9"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.41.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.41.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.41.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.41.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.41.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.41.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.42.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.42.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "965a5e9cf3b3dcf5aac6688a8dcbb28b"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.42.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "9adaca8fa50e876723892fb3601b8b3c"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.42.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "f379aa793cb2c8da4b4f674767a64b53"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.43.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "0cc2e3704a25930074bf3c4146daf587"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.42.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.42.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.42.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.42.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.42.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.42.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.43.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.43.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "ceeb7fa446b82f5519611fb75bcb1e73"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.43.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "791b43015f4f501314ab722e388b6225"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.43.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "2cde58856c9af9c8ec0106197b148850"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.44.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "98190f3a4f88dede59f598ddb91b2b94"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.43.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.43.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.43.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.43.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.43.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.43.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.44.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.44.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "8e56f2a4be6b75ccf50158f14dd76acb"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.44.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "2318d5f199693812f581fac9f1b04e6b"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.44.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "b19ab81c417bff65aeee25784e74f8a2"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.45.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "1346a74cb2b723f7dae8738092fc9e27"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.44.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.44.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.44.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.44.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.44.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.44.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.45.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.45.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "c4718063906715e3004929d9116bd783"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.45.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "0cfd8b0f322d50ef664a345575f0b9b9"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.45.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "55510bc907509fa6c26b09d8d66e8879"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.46.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "32deb7b195fcaabff18a174d9bbed193"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.45.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.45.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.45.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.45.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.45.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.45.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.46.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.46.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "76cb6e7164301dd19bf705aff1f26e5d"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.46.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "736efad18492120448e43c776b7e9a25"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.46.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "fb3c5f9e9c7663b8d13882f22a0f2aa4"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 18350080,
"records": [
{
"name": "model.layers.47.self_attn.c_attn.q_weight",
"shape": [
7168,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18350080,
"byteOffset": 0
}
],
"md5sum": "c287e0602a073d45d28d3d3f1dfd9dd0"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 30345216,
"records": [
{
"name": "model.layers.46.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.46.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.46.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.46.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.46.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.46.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.layers.47.self_attn.c_attn.bias",
"shape": [
7168
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 14336,
"byteOffset": 28037120
},
{
"name": "model.layers.47.self_attn.c_attn.q_scale",
"shape": [
7168,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 2293760,
"byteOffset": 28051456
}
],
"md5sum": "934be68f81995621154452adad6676fd"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 70778880,
"records": [
{
"name": "model.layers.47.mlp.gate_up_proj.q_weight",
"shape": [
27648,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 70778880,
"byteOffset": 0
}
],
"md5sum": "f14e9a08a484031adf3a07f050e6ff24"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 35389440,
"records": [
{
"name": "model.layers.47.mlp.down_proj.q_weight",
"shape": [
5120,
1728
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 35389440,
"byteOffset": 0
}
],
"md5sum": "7939af8318c6096f44610ae6f411f696"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 389283840,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
152064,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 389283840,
"byteOffset": 0
}
],
"md5sum": "63a4219bd8f0d5088b95588bd567597a"
},
{
"dataPath": "params_shard_194.bin",
"format": "raw-shard",
"nbytes": 48660480,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
152064,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 48660480,
"byteOffset": 0
}
],
"md5sum": "f082397dddce00dec18bdcdf0448c1f2"
},
{
"dataPath": "params_shard_195.bin",
"format": "raw-shard",
"nbytes": 28047360,
"records": [
{
"name": "model.layers.47.self_attn.o_proj.q_weight",
"shape": [
5120,
640
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 13107200,
"byteOffset": 0
},
{
"name": "model.layers.47.self_attn.o_proj.q_scale",
"shape": [
5120,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 1638400,
"byteOffset": 13107200
},
{
"name": "model.layers.47.mlp.gate_up_proj.q_scale",
"shape": [
27648,
160
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 8847360,
"byteOffset": 14745600
},
{
"name": "model.layers.47.mlp.down_proj.q_scale",
"shape": [
5120,
432
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 4423680,
"byteOffset": 23592960
},
{
"name": "model.layers.47.input_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28016640
},
{
"name": "model.layers.47.post_attention_layernorm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28026880
},
{
"name": "model.norm.weight",
"shape": [
5120
],
"dtype": "float32",
"format": "f32-to-bf16",
"nbytes": 10240,
"byteOffset": 28037120
}
],
"md5sum": "97c68ec5e6f50b02655cd803ec64874c"
}
]
}