dusty-nv's picture
Upload folder using huggingface_hub
36ea62d verified
{
"metadata": {
"ParamSize": 325,
"ParamBytes": 4083949568.0,
"BitsPerParam": 4.068559606592764
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 262668288,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
128256,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262668288,
"byteOffset": 0
}
],
"md5sum": "90455a89d1f126d08436b3f9763bfd45"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 32833536,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
128256,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32833536,
"byteOffset": 0
}
],
"md5sum": "e6b4a4d9bea78df69d0088aa994566eb"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "9b778597ab5ca87dc01e04221c45cb5b"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "e8e1bf18bec705ae1e772b4730919929"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "fa6f164217ade3bb34c156c4f286ab9c"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2c6936ad435ad1c6b2ab47317331f196"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3cbda99e7efd5da49968b3c299407eee"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "e3f093b5b387544fcd0b3c7b3e6b6e04"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "b9d3c360a92dc0284f8ba7d37ef19d8b"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "42c0f06465b4d6e32469cca8acff7b25"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "b1a8ac60c0737659a44c189f115950cf"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "fd3581a9e811c8dc1f233e304b4a13b3"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "5ea15f4743ff8e15b00df1d3dfde33a8"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "51e49874df52cca7a4ba61e800b151ce"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "1f438e2afbf2aa85acc612075229ece2"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "d225be884b5de1ca07c9fdfdaa1b72fa"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "ab1a7cb113084a2bfe82609ec0f270f0"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a9d1501cdb20522d8578d762010a9e66"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6a535989520729fe1ad108283412617c"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "56d48fa047896109a0843b116fd56699"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "8e65c82daf533f41d1178a896edbc0ab"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3756430dec29dcf5749119711e7cde01"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "79d548683788092e7d319bba15560945"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "f26b83e431294734057c2db0763c85ac"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "2cced24fdc07387133855b2a83ef9216"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "9d11449dff963ae5177bb3133fa296a7"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a8f516e7e20a894f60295b4c5b9a4333"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "58333f98f65816979f05aa195e553680"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "3d3102e49dca6b029257844d01fc5e9e"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "ddaa1c46e87cce229d39e9c1d32c09ba"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "ceb6060ba03935f48d9cf64f5615cc7f"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "ecaa5bf55729eb0234746f0ca6874f57"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "129f390132725a18ce5e8657be5f967b"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "edae9498b651722087226b37f3a9aa73"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "e0617218474b6197a73dca78fd532b4f"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "46f9b29351334ec7067cf3439a14d3b7"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "79ebbd1a6cbc5773150c36ac23d1772c"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "8e4bcbb077f78ea3f6b767a517e66719"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "879e51c4c5239b702d24a326cb464842"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "1b827c0ecc6a5767977f99e39d9c71a7"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "2b9439ab59b80669dc37fc84c840d856"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "c139c737a002ca943d9dd04d968e0433"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "2b3704b19fd8a5d3ef619b263984594d"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "b3b3cb890e090d651989c819a6623552"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "03eb592d6b98a4b8c412110c43785637"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "45524aef6c0af6d870b48e877b68026c"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "28653197dfa7031f95d4b0080194fc85"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "16c6de779331e7eaf86b3cb8c618138c"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "9efd65a7764ffbb32af9513533883a8e"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "ce2e038c019ccf3a617b095878179a35"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "76b2c7179bf554546b5856870c6562a4"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "419ccb3238310e2c1992323ef6d718a0"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "3dcf7222ed7dfe4a544e64df5af9c89b"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "5b3d755084a5092b8d530d13bab57f2e"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "a995bfca9163b9e9c7f15b3fc0ce76f7"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "a7917ff1b8e96d376ac45a7eabe663b0"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "d5df9afe508df9e79349479bf213579d"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "49955828a76f4de79a3e726e4009544e"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "5c94d5510d687646d5203149894ee6ef"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "764e9984c4138bbadb1c8d04e4bb6dc2"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "3d467cb74d79163ce0814cab942e616b"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "04ea55217a41c9b86ca838a255ff2e99"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "dc792cb72cdd3b0011cba2a0fcd481e6"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "019cba3ae7e301958093d326420a5733"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "4535d011c332afec1cce45b81a7b9a3b"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "fc22e0cbefe67293eda818abaad18ccb"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "944637e4ab87497197dd5281cbf45946"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "a3eb9dc9a7fa1e1eb2051c726c067294"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "304476e20392ae34bb400246b52f76e0"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0c36d92068dd2d6e5a19c5fc2bd58a53"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "a865e007db83e6c0445a5759002d13d7"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "8bca1047c80d899cf56a354f902c1749"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "03a2f1bea6d99dae9d63212f312d4ee6"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "ce9afcd235e30ca37998c4c9cf1e2495"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "74bbc3c83436d6221751022e64de7bbb"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "6a45b1511a7d226f881fa9f7719860b9"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "18c4ac71cff5ba0bcb5366277594ddc7"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "90de58cdb3d75ffb100d660740160c9d"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "465e2a5ab8372fac9d9944063e73dd65"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "0bf40d9d5464f14a685b1ed73e7b8f60"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.26.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "1adc3bde7e5441ae2b59a861ebd2e960"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.26.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "bde1e9a40dc8321dc28792d8a3d35585"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.26.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.26.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.26.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.26.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.26.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.26.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.26.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.26.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "f6997bdd67d2f53290979c1da5b523bf"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.27.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "76822e192a5f9c866542294df1600e0b"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.27.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "faf0b05547e7c32ff1a7cf03b5776c89"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.27.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.27.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.27.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.27.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.27.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.27.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.27.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "a45054b9e2efd9de6b862bf96a2a4960"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.28.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "7a00512156ce6043af21422e4ad1409b"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.28.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "0f3a7ade09af0b45bed61a82dfd024c1"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.28.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.28.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.28.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.28.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.28.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.28.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.28.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.28.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "24dc66cd9d9597f05d4849c97839dd30"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.29.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "07672ad529dd72b8a79faef3f6bc36b8"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.29.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "4d46cf56bb86b01c0ea48c13d4bff904"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.29.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.29.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.29.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.29.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.29.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.29.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.29.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.29.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "8853c2a39243f41eda78a7e083b1a240"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.30.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "2f10dedfd2699926d35e9abe35decc96"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.30.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "5f542b642be02c5549fa56e01fb9806a"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 21073920,
"records": [
{
"name": "model.layers.30.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.30.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.30.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.30.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.30.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.30.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.30.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.30.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
}
],
"md5sum": "169a901be44149d823b232c80c29af13"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 58720256,
"records": [
{
"name": "model.layers.31.mlp.gate_up_proj.q_weight",
"shape": [
4096,
14336
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 58720256,
"byteOffset": 0
}
],
"md5sum": "a11fafedf9e099beb3dc30a4e63f6d5c"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 29360128,
"records": [
{
"name": "model.layers.31.mlp.down_proj.q_weight",
"shape": [
14336,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 29360128,
"byteOffset": 0
}
],
"md5sum": "46d455edf96a732c75ea10d3468f5bca"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 262668288,
"records": [
{
"name": "lm_head.q_weight",
"shape": [
128256,
512
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 262668288,
"byteOffset": 0
}
],
"md5sum": "6319b5906b36a200a67e2a99678185e0"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 32833536,
"records": [
{
"name": "lm_head.q_scale",
"shape": [
128256,
128
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 32833536,
"byteOffset": 0
}
],
"md5sum": "51becee22d4a53fb23483900cf2507ae"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 21082112,
"records": [
{
"name": "model.layers.31.self_attn.qkv_proj.q_weight",
"shape": [
4096,
3072
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.31.self_attn.qkv_proj.q_scale",
"shape": [
1,
6144
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.31.self_attn.o_proj.q_weight",
"shape": [
4096,
2048
],
"dtype": "int8",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 12595200
},
{
"name": "model.layers.31.self_attn.o_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 20983808
},
{
"name": "model.layers.31.mlp.gate_up_proj.q_scale",
"shape": [
1,
28672
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 57344,
"byteOffset": 20992000
},
{
"name": "model.layers.31.mlp.down_proj.q_scale",
"shape": [
1,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21049344
},
{
"name": "model.layers.31.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21057536
},
{
"name": "model.layers.31.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21065728
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 21073920
}
],
"md5sum": "33788ae8cdaa0e8cde1145afef291ecb"
}
]
}