|
{ |
|
"metadata": { |
|
"ParamSize": 339, |
|
"ParamBytes": 29540067328.0, |
|
"BitsPerParam": 16.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 1557135360, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.weight", |
|
"shape": [ |
|
152064, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1557135360, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e3a88fb162e09c74a3abdd7477505e1a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "df3a4ded4a39115766a609f2c0580462" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b78e23a76882ab61b154c0a001228f85" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "72fbb1c01062828445b62a90c4353f05" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e3fbaf2bd29d1d5298f43cd0ff3599d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7bc9a00ea9d41e99f304e235add2daf4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "360065f9e4f3a651c99f30a16c71c198" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c1e0f02c96434c8dfe3315372cdb8004" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2dbc96a7c215f24808437b4b7dc7b20a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "26d7bbefe41deb59a913d093eed4754e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5625f89c71728aea771affe1edb18295" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c79250b13bcef0cd2bc15e5ec596df62" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "50f940cb5fae4512e6c12541a5eb47d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7c381fc523d528decba45fcf8ed24aef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1d0fb2a369d211259af03b60cffb48df" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4887d0d496fa45e2379eae73aa12440e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dc980f4e3f3734e5c3087c4498aebd60" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "26e19d6e8b793b36a89a91b54d8a07e9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4b13d03bd4a397a8d495f38ddc70e2bf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aaf9e688818998f220eb93c6af3e7181" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ece5c0bd275d63f5240055b1ac3324c6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c83d263e1a40a250b03a92ab62432865" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cb07efb0c1877f74381fe36cea5d9011" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d976bf0b23d747669ad160a5d3622a36" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c41b64ad8715a585cb34c1ebd4cbbc86" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "35de6e609963e053e0ded46ec8fe8cfe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2646569fee85209486632e9cd88101c9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b66f11a12ca70744c65d391d15448c43" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "74c4cc417713e838d35e6571fed730b8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aac399850a3b11fac05fe89177257b41" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1b3f51237ca465cfef75391136e500b8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3c7c8a23e8fb66985a250c88d801ee18" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7544cb60cbdf162dccec5ba1e02673e0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "780817bf2ee659ef375843987b1155f9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "38afd72b3e5f12694ca65ead5dfa2f2b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ee7ced9dc4e6aa52c30b6ec595a65670" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8e694e55d2388517a9f31b09c700b1c5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8aa4410041635b582bfdd9b7a6dd2207" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7f39ce6a56e668b3f7e413716e51e6a5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c0506c3097a01f6573e1d26f254a7b20" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3335f73dea6fa12a8b3212f67fa81285" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d7a5ece6089fbbef7f9f3664ee982372" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "737a0cfd3058d83685327ab6f23a7934" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "adc61cb390a7a1511617cf58362c4d3e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ff888c57e70cab750d7cd03bc48be148" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "167bdf016358e31de241ad822a29204f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6ffca154630a8d323a17a3eb5172bf32" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7d1dd5c49945a3b2a4bccc636f990202" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "15a365d8e7d5b780f75ab82aac097ead" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4fd29f21b170842a5b1550a7bb52d0b6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7e9b7e16c2c962b9bbba24ffe57f3e33" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b0bbaa94b82fd3b274ce7a51050c29bf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ef86b8a74f9d0bcd40290808b2cdf100" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8858de54c7d70d98364320614f527a18" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7fb935e2e824c267ef539e69d0770c12" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "23462cfcc965220204085ca0792c8fb7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "39dde34a88a9b9645eb63ea227f40843" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9271e37db3b4dfc9d4153c9d51950005" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b76030a6a611e6358a551187a395b06b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ed733f59333c522233084071bc9ba796" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "391ce997155f35d9d33c5ecc2cdbab96" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e5669e4cd2aa14c64e2eff11602c3cb1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d385765e379af07cb0d870920fcbbe7a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "747cab30aae941ddc3ec09e1f317ec12" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d09eb939e64b431ed3880a452344808e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dec854422421a3526b393486657c991c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d59523157fa728c58e7a6d9fc75bd53c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "edc881657a32c0929f803cedc86e0cb7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "073c94707cf3a0efdd1e7b5ae1a019c6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8729670930b5650085991796c9e7cb6b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e73fee0733e0503ffc15fd6cde003906" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "af183caa22588cf60b99e7cfc8d4262d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c20a5305d5d4f3326691c6d9c9a646d1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e82d628fb374e3ff254721f7dfdc6bf7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a09b81901e2daef5ef04a43acac37fa0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "558929d827029aa524ac3f2c91423a93" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5de1e895c1d88bcce39c29a5397fbd11" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "54462e1e575bdb839509eaceca779711" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a7977f2b39a9801d5db36c19c58ba388" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5742c19adf2f0edc65fd444496814f23" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b9b49ec79da28ea4415211111142f059" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1059c0ca4ea5704e045a9e7351a990d1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c4aec0fd023c6bc8d9fb14723fd12e79" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ae244f80e2ef362547e1dd2ac2f22f5e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f6892f1ef526cb7abdddb22eebdcc405" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e0570289129c141a572be3812f754ba5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b7d5883405f686528edb24c62e5582f7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "603a7e971dee064fcb8d7e5665ffbcc2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a92cedf8adcf2fb76d4dffc511a57014" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9ea74973ff9716671ea0814cce56f9a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b2601a9c36c05ec87c91b96bb58af8a7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1f3bafe83ae69e097b87ab1b9483783f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7fceb6e12fb536b761def09448e13d44" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "432e7f1d35a40da01c7e8c2445fbf97c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "568fc3ca58176204435e31fe66ba614b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8dd762698bbacb017c47190ff89c281c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e21e072ba3a987c54069be3dcd2701ed" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b4d329de0bb8e8012be1a624ab31e04e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3a520124e2adb69666ee8dc9fb21d92d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7614fb83ab7f795ba84b01745714cdb6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f45bcad59479493268db04749ac3049b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0e425f31e36be0657389c6178ed95844" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "86a8533ae72d04a39a5d839512e2846d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e1f4f4ce59da9441f05c97740f014662" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d098fcd1b5aa9fae14be33501f638da8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b47c22469159c5c766adecc5ba81de68" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "025a2ae54a6e53e2ddde8a469b531cc5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b8fa2b033e638ac7360f093f38f9dee3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7f45964ec921768872cc4dd9a61de056" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1b96890eb6cb0c425b45c612fde18729" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "71d0a6797673c20948a8ab471ea61632" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "541d4fd435a74b9135314f869a64249b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "47d63508fdc49053b467751eda9333b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "14197d133a391c37e3f153595a63b270" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0b41b9981cd1fcee3b9d41c0bf3b3cba" |
|
}, |
|
{ |
|
"dataPath": "params_shard_115.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1489f3044929a3f09e1e85a473f19d1f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_116.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7598a9db58afb3f78cc5c6e68a422fbc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_117.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4c4cfb8e3c3439349e2c0b9f94745f62" |
|
}, |
|
{ |
|
"dataPath": "params_shard_118.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d823e8e89c349136995b6608bb54d3ef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_119.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f37368ead6d6cd0a7ec8629920b4e1df" |
|
}, |
|
{ |
|
"dataPath": "params_shard_120.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "be1aca0897675e46781146c08b15eb3b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_121.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d6356519ce46c68ab4cd035e045cf5ec" |
|
}, |
|
{ |
|
"dataPath": "params_shard_122.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0fe003424303aa6a854ba46cc14345b1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_123.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "67c97f0dba41d5ffb404c3a2bf7cc5ce" |
|
}, |
|
{ |
|
"dataPath": "params_shard_124.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f83aa366a50982a6baa9e5ee6683f344" |
|
}, |
|
{ |
|
"dataPath": "params_shard_125.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6478f4ffdff0a819be40b94d2ae85251" |
|
}, |
|
{ |
|
"dataPath": "params_shard_126.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "04cee52182f1c577347f63eb4ed32813" |
|
}, |
|
{ |
|
"dataPath": "params_shard_127.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e681a86d3803b271c9aad6ef5db5707d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_128.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bd840f0352debab221d950f23d27cd4b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_129.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5401c8e2e13cb25ed6c8a93999f7383a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_130.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4f6512c417c5e89d6e6b4de85bd7a3e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_131.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8c834c991794a3aa68ea51329e14352a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_132.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "059902fbab3d9ef2c8978ed615185b49" |
|
}, |
|
{ |
|
"dataPath": "params_shard_133.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e8e2f6faba18f4a07e6a13630dab130d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_134.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cb54aedbc12aa2cc97475e8ce9e75a12" |
|
}, |
|
{ |
|
"dataPath": "params_shard_135.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "54aee8cc6c9aa3a29f7a128e7fd787e3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_136.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cc9344e2a9e24540423016f868280265" |
|
}, |
|
{ |
|
"dataPath": "params_shard_137.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "20428c3aa723cf2a48713218b572a7be" |
|
}, |
|
{ |
|
"dataPath": "params_shard_138.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "624daaf765af5f8c597ab22c698dfd65" |
|
}, |
|
{ |
|
"dataPath": "params_shard_139.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5eb235f294de6b672f49ca005997eb0c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_140.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aaf86ff912f84567bd626326d184ca12" |
|
}, |
|
{ |
|
"dataPath": "params_shard_141.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "54eb544807bfcace5611ef4c3830701c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_142.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7fe9e7d88d4c3306e0d8cf7d3bec03ca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_143.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e747119d14888ecbe15ceb0799cd41c9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_144.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "924a6a93202997b987f3918537ffb4f8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_145.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b3ade6ccff892d87bb2d9fcc54805bb8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_146.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6b3fc5ea6610ae1e417f7ce1a6b8d8c9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_147.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "37aec8b0407a511136baa1e93af69690" |
|
}, |
|
{ |
|
"dataPath": "params_shard_148.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8c8b3cb2dc4b49ffcdaea101f1db160f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_149.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "627b763dc459c15ac716b020b49d8d9e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_150.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e0042c88298fe89a379f5a7a8cb69044" |
|
}, |
|
{ |
|
"dataPath": "params_shard_151.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6f4a34b3d748f2ca1631a6121683a3cd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_152.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "61ab8cbf4c0056d1218645fd5940795a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_153.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "78c8f74a45f97bdf06fa7cce588a955d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_154.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bf1747918c42741fb0988f1a68562821" |
|
}, |
|
{ |
|
"dataPath": "params_shard_155.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1ebae1b7cacd8f3be26aa80d31d07b38" |
|
}, |
|
{ |
|
"dataPath": "params_shard_156.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "09378421ef2bfd882c3245d74cb12255" |
|
}, |
|
{ |
|
"dataPath": "params_shard_157.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f5970194f94b7efc7551c444a057b698" |
|
}, |
|
{ |
|
"dataPath": "params_shard_158.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1ea7cf373a26752e80abbfee2fc386f2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_159.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6ed26e2ea4fae5bf5b7108c4fb55f4aa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_160.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e81e466ec6dc76123a9cb8522052c8b2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_161.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f6bdc0b31737174643487471a49c55b3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_162.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1d41e05d60ad6f7f5bc22fda7be2f813" |
|
}, |
|
{ |
|
"dataPath": "params_shard_163.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "73a4da4a11747ecf2ae31909307e2a88" |
|
}, |
|
{ |
|
"dataPath": "params_shard_164.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "837d67e7f1f64b4ca96ec26737ae4eb2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_165.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a20d6faa5e885c74548b87ef939ba6d9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_166.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6b37fc534c6c9c8fbff239d79c490358" |
|
}, |
|
{ |
|
"dataPath": "params_shard_167.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b44744e7b0d210345cf0c0f0a482f36f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_168.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "50c9e4fe2082a2a599daf1ef409627c6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_169.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5526c5388dded60dbb2f01494e453cd1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_170.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4f34fe62f2fc8bd592fa20de2f9a19d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_171.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "478983527ffeaa31fafc38ed9b290af9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_172.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c9ae989899b894e75bf8680e5d2a4651" |
|
}, |
|
{ |
|
"dataPath": "params_shard_173.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d26d2507231a307713567443ab446feb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_174.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "14b008b532a8a471a295d472c31955e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_175.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c2bb07bc281790fbca359c3950880fb6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_176.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5b7dcfd6f7eb313580139b35495566ec" |
|
}, |
|
{ |
|
"dataPath": "params_shard_177.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dd289ccab6f1b85c9e0f64a759ec3d5c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_178.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "404e95fcc2116ec3c6fa8962e1ef8202" |
|
}, |
|
{ |
|
"dataPath": "params_shard_179.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "69ad0fdd52c5568ddd1a2ce915394ba8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_180.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d17bbc896690d7ae32c9eaa8761ec309" |
|
}, |
|
{ |
|
"dataPath": "params_shard_181.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8660c8072cadfdc384217ca692045a9e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_182.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "50315613e2a8922d823529c7e7cc03b2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_183.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bb26712172cb7cb20bc8b90787b1406a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_184.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "65f476f83843c64fbf72d0f201e1eee8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_185.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e549a4f423a1adeee32249f10d92f392" |
|
}, |
|
{ |
|
"dataPath": "params_shard_186.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e9d812cfaa8145f9d88ab1b210cfb196" |
|
}, |
|
{ |
|
"dataPath": "params_shard_187.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d3ecd3419fe478936278cddf13e3e4f2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_188.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7a127f7eab0ba1711f22a10bb80ff31c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_189.bin", |
|
"format": "raw-shard", |
|
"nbytes": 73400320, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.self_attn.c_attn.weight", |
|
"shape": [ |
|
7168, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 73400320, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2f35869a8a0cb4d37103275e717cab54" |
|
}, |
|
{ |
|
"dataPath": "params_shard_190.bin", |
|
"format": "raw-shard", |
|
"nbytes": 52428800, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.self_attn.o_proj.weight", |
|
"shape": [ |
|
5120, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 52428800, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b84398987a828e6c4ee161df9bfa1c4b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_191.bin", |
|
"format": "raw-shard", |
|
"nbytes": 283115520, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
27648, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 283115520, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "eb2a096ae21fd98e16ef05bcf267466f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_192.bin", |
|
"format": "raw-shard", |
|
"nbytes": 141557760, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.mlp.down_proj.weight", |
|
"shape": [ |
|
5120, |
|
13824 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 141557760, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f7e2c33a0ec9c9f509b5a4d8d41b26ec" |
|
}, |
|
{ |
|
"dataPath": "params_shard_193.bin", |
|
"format": "raw-shard", |
|
"nbytes": 1557135360, |
|
"records": [ |
|
{ |
|
"name": "lm_head.weight", |
|
"shape": [ |
|
152064, |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1557135360, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b61f817b65ef3a1d1028e40275172163" |
|
}, |
|
{ |
|
"dataPath": "params_shard_194.bin", |
|
"format": "raw-shard", |
|
"nbytes": 1681408, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 14336 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 24576 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 34816 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 49152 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 59392 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 69632 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 83968 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 94208 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 104448 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 118784 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 129024 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 139264 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 153600 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 163840 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 174080 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 188416 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 198656 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 208896 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 223232 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 233472 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 243712 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 258048 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 268288 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 278528 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 292864 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 303104 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 313344 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 327680 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 337920 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 348160 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 362496 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 372736 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 382976 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 397312 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 407552 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 417792 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 432128 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 442368 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 452608 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 466944 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 477184 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 487424 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 501760 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 512000 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 522240 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 536576 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 546816 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 557056 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 571392 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 581632 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 591872 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 606208 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 616448 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 626688 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 641024 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 651264 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 661504 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 675840 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 686080 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 696320 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 710656 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 720896 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 731136 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 745472 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 755712 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 765952 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 780288 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 790528 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 800768 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 815104 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 825344 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 835584 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 849920 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 860160 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 870400 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 884736 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 894976 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 905216 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 919552 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 929792 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 940032 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 954368 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 964608 |
|
}, |
|
{ |
|
"name": "model.layers.28.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 974848 |
|
}, |
|
{ |
|
"name": "model.layers.28.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 989184 |
|
}, |
|
{ |
|
"name": "model.layers.28.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 999424 |
|
}, |
|
{ |
|
"name": "model.layers.29.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1009664 |
|
}, |
|
{ |
|
"name": "model.layers.29.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1024000 |
|
}, |
|
{ |
|
"name": "model.layers.29.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1034240 |
|
}, |
|
{ |
|
"name": "model.layers.30.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1044480 |
|
}, |
|
{ |
|
"name": "model.layers.30.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1058816 |
|
}, |
|
{ |
|
"name": "model.layers.30.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1069056 |
|
}, |
|
{ |
|
"name": "model.layers.31.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1079296 |
|
}, |
|
{ |
|
"name": "model.layers.31.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1093632 |
|
}, |
|
{ |
|
"name": "model.layers.31.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1103872 |
|
}, |
|
{ |
|
"name": "model.layers.32.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1114112 |
|
}, |
|
{ |
|
"name": "model.layers.32.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1128448 |
|
}, |
|
{ |
|
"name": "model.layers.32.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1138688 |
|
}, |
|
{ |
|
"name": "model.layers.33.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1148928 |
|
}, |
|
{ |
|
"name": "model.layers.33.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1163264 |
|
}, |
|
{ |
|
"name": "model.layers.33.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1173504 |
|
}, |
|
{ |
|
"name": "model.layers.34.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1183744 |
|
}, |
|
{ |
|
"name": "model.layers.34.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1198080 |
|
}, |
|
{ |
|
"name": "model.layers.34.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1208320 |
|
}, |
|
{ |
|
"name": "model.layers.35.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1218560 |
|
}, |
|
{ |
|
"name": "model.layers.35.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1232896 |
|
}, |
|
{ |
|
"name": "model.layers.35.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1243136 |
|
}, |
|
{ |
|
"name": "model.layers.36.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1253376 |
|
}, |
|
{ |
|
"name": "model.layers.36.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1267712 |
|
}, |
|
{ |
|
"name": "model.layers.36.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1277952 |
|
}, |
|
{ |
|
"name": "model.layers.37.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1288192 |
|
}, |
|
{ |
|
"name": "model.layers.37.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1302528 |
|
}, |
|
{ |
|
"name": "model.layers.37.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1312768 |
|
}, |
|
{ |
|
"name": "model.layers.38.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1323008 |
|
}, |
|
{ |
|
"name": "model.layers.38.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1337344 |
|
}, |
|
{ |
|
"name": "model.layers.38.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1347584 |
|
}, |
|
{ |
|
"name": "model.layers.39.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1357824 |
|
}, |
|
{ |
|
"name": "model.layers.39.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1372160 |
|
}, |
|
{ |
|
"name": "model.layers.39.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1382400 |
|
}, |
|
{ |
|
"name": "model.layers.40.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1392640 |
|
}, |
|
{ |
|
"name": "model.layers.40.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1406976 |
|
}, |
|
{ |
|
"name": "model.layers.40.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1417216 |
|
}, |
|
{ |
|
"name": "model.layers.41.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1427456 |
|
}, |
|
{ |
|
"name": "model.layers.41.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1441792 |
|
}, |
|
{ |
|
"name": "model.layers.41.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1452032 |
|
}, |
|
{ |
|
"name": "model.layers.42.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1462272 |
|
}, |
|
{ |
|
"name": "model.layers.42.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1476608 |
|
}, |
|
{ |
|
"name": "model.layers.42.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1486848 |
|
}, |
|
{ |
|
"name": "model.layers.43.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1497088 |
|
}, |
|
{ |
|
"name": "model.layers.43.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1511424 |
|
}, |
|
{ |
|
"name": "model.layers.43.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1521664 |
|
}, |
|
{ |
|
"name": "model.layers.44.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1531904 |
|
}, |
|
{ |
|
"name": "model.layers.44.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1546240 |
|
}, |
|
{ |
|
"name": "model.layers.44.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1556480 |
|
}, |
|
{ |
|
"name": "model.layers.45.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1566720 |
|
}, |
|
{ |
|
"name": "model.layers.45.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1581056 |
|
}, |
|
{ |
|
"name": "model.layers.45.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1591296 |
|
}, |
|
{ |
|
"name": "model.layers.46.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1601536 |
|
}, |
|
{ |
|
"name": "model.layers.46.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1615872 |
|
}, |
|
{ |
|
"name": "model.layers.46.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1626112 |
|
}, |
|
{ |
|
"name": "model.layers.47.self_attn.c_attn.bias", |
|
"shape": [ |
|
7168 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 14336, |
|
"byteOffset": 1636352 |
|
}, |
|
{ |
|
"name": "model.layers.47.input_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1650688 |
|
}, |
|
{ |
|
"name": "model.layers.47.post_attention_layernorm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1660928 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
5120 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 10240, |
|
"byteOffset": 1671168 |
|
} |
|
], |
|
"md5sum": "94e49e672e625292e007ba7d61d222ff" |
|
} |
|
] |
|
} |