{ "metadata": { "ParamSize": 339, "ParamBytes": 29540067328.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1557135360, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 152064, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1557135360, "byteOffset": 0 } ], "md5sum": "e3a88fb162e09c74a3abdd7477505e1a" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.0.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "df3a4ded4a39115766a609f2c0580462" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b78e23a76882ab61b154c0a001228f85" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "72fbb1c01062828445b62a90c4353f05" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e3fbaf2bd29d1d5298f43cd0ff3599d9" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.1.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "7bc9a00ea9d41e99f304e235add2daf4" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "360065f9e4f3a651c99f30a16c71c198" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "c1e0f02c96434c8dfe3315372cdb8004" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "2dbc96a7c215f24808437b4b7dc7b20a" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.2.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "26d7bbefe41deb59a913d093eed4754e" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "5625f89c71728aea771affe1edb18295" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "c79250b13bcef0cd2bc15e5ec596df62" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "50f940cb5fae4512e6c12541a5eb47d8" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.3.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "7c381fc523d528decba45fcf8ed24aef" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1d0fb2a369d211259af03b60cffb48df" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "4887d0d496fa45e2379eae73aa12440e" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "dc980f4e3f3734e5c3087c4498aebd60" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.4.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "26e19d6e8b793b36a89a91b54d8a07e9" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "4b13d03bd4a397a8d495f38ddc70e2bf" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "aaf9e688818998f220eb93c6af3e7181" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ece5c0bd275d63f5240055b1ac3324c6" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.5.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "c83d263e1a40a250b03a92ab62432865" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "cb07efb0c1877f74381fe36cea5d9011" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "d976bf0b23d747669ad160a5d3622a36" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c41b64ad8715a585cb34c1ebd4cbbc86" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.6.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "35de6e609963e053e0ded46ec8fe8cfe" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "2646569fee85209486632e9cd88101c9" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "b66f11a12ca70744c65d391d15448c43" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "74c4cc417713e838d35e6571fed730b8" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.7.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "aac399850a3b11fac05fe89177257b41" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1b3f51237ca465cfef75391136e500b8" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "3c7c8a23e8fb66985a250c88d801ee18" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7544cb60cbdf162dccec5ba1e02673e0" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.8.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "780817bf2ee659ef375843987b1155f9" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "38afd72b3e5f12694ca65ead5dfa2f2b" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "ee7ced9dc4e6aa52c30b6ec595a65670" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "8e694e55d2388517a9f31b09c700b1c5" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.9.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "8aa4410041635b582bfdd9b7a6dd2207" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "7f39ce6a56e668b3f7e413716e51e6a5" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "c0506c3097a01f6573e1d26f254a7b20" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "3335f73dea6fa12a8b3212f67fa81285" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.10.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "d7a5ece6089fbbef7f9f3664ee982372" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "737a0cfd3058d83685327ab6f23a7934" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "adc61cb390a7a1511617cf58362c4d3e" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ff888c57e70cab750d7cd03bc48be148" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.11.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "167bdf016358e31de241ad822a29204f" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "6ffca154630a8d323a17a3eb5172bf32" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "7d1dd5c49945a3b2a4bccc636f990202" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "15a365d8e7d5b780f75ab82aac097ead" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.12.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "4fd29f21b170842a5b1550a7bb52d0b6" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "7e9b7e16c2c962b9bbba24ffe57f3e33" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "b0bbaa94b82fd3b274ce7a51050c29bf" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ef86b8a74f9d0bcd40290808b2cdf100" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.13.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "8858de54c7d70d98364320614f527a18" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "7fb935e2e824c267ef539e69d0770c12" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "23462cfcc965220204085ca0792c8fb7" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "39dde34a88a9b9645eb63ea227f40843" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.14.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "9271e37db3b4dfc9d4153c9d51950005" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b76030a6a611e6358a551187a395b06b" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "ed733f59333c522233084071bc9ba796" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "391ce997155f35d9d33c5ecc2cdbab96" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.15.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "e5669e4cd2aa14c64e2eff11602c3cb1" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "d385765e379af07cb0d870920fcbbe7a" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "747cab30aae941ddc3ec09e1f317ec12" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d09eb939e64b431ed3880a452344808e" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.16.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "dec854422421a3526b393486657c991c" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "d59523157fa728c58e7a6d9fc75bd53c" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "edc881657a32c0929f803cedc86e0cb7" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "073c94707cf3a0efdd1e7b5ae1a019c6" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.17.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "8729670930b5650085991796c9e7cb6b" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "e73fee0733e0503ffc15fd6cde003906" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "af183caa22588cf60b99e7cfc8d4262d" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c20a5305d5d4f3326691c6d9c9a646d1" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.18.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "e82d628fb374e3ff254721f7dfdc6bf7" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "a09b81901e2daef5ef04a43acac37fa0" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "558929d827029aa524ac3f2c91423a93" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5de1e895c1d88bcce39c29a5397fbd11" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.19.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "54462e1e575bdb839509eaceca779711" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "a7977f2b39a9801d5db36c19c58ba388" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "5742c19adf2f0edc65fd444496814f23" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "b9b49ec79da28ea4415211111142f059" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.20.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "1059c0ca4ea5704e045a9e7351a990d1" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "c4aec0fd023c6bc8d9fb14723fd12e79" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "ae244f80e2ef362547e1dd2ac2f22f5e" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f6892f1ef526cb7abdddb22eebdcc405" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.21.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "e0570289129c141a572be3812f754ba5" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b7d5883405f686528edb24c62e5582f7" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "603a7e971dee064fcb8d7e5665ffbcc2" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a92cedf8adcf2fb76d4dffc511a57014" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.22.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "9ea74973ff9716671ea0814cce56f9a6" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b2601a9c36c05ec87c91b96bb58af8a7" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "1f3bafe83ae69e097b87ab1b9483783f" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7fceb6e12fb536b761def09448e13d44" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.23.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "432e7f1d35a40da01c7e8c2445fbf97c" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "568fc3ca58176204435e31fe66ba614b" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "8dd762698bbacb017c47190ff89c281c" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e21e072ba3a987c54069be3dcd2701ed" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.24.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "b4d329de0bb8e8012be1a624ab31e04e" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "3a520124e2adb69666ee8dc9fb21d92d" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "7614fb83ab7f795ba84b01745714cdb6" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f45bcad59479493268db04749ac3049b" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.25.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "0e425f31e36be0657389c6178ed95844" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "86a8533ae72d04a39a5d839512e2846d" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "e1f4f4ce59da9441f05c97740f014662" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d098fcd1b5aa9fae14be33501f638da8" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.26.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "b47c22469159c5c766adecc5ba81de68" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "025a2ae54a6e53e2ddde8a469b531cc5" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "b8fa2b033e638ac7360f093f38f9dee3" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7f45964ec921768872cc4dd9a61de056" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.27.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "1b96890eb6cb0c425b45c612fde18729" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "71d0a6797673c20948a8ab471ea61632" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "541d4fd435a74b9135314f869a64249b" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "47d63508fdc49053b467751eda9333b7" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.28.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "14197d133a391c37e3f153595a63b270" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.28.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "0b41b9981cd1fcee3b9d41c0bf3b3cba" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "1489f3044929a3f09e1e85a473f19d1f" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.28.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7598a9db58afb3f78cc5c6e68a422fbc" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.29.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "4c4cfb8e3c3439349e2c0b9f94745f62" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.29.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "d823e8e89c349136995b6608bb54d3ef" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "f37368ead6d6cd0a7ec8629920b4e1df" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.29.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "be1aca0897675e46781146c08b15eb3b" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.30.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "d6356519ce46c68ab4cd035e045cf5ec" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.30.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "0fe003424303aa6a854ba46cc14345b1" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "67c97f0dba41d5ffb404c3a2bf7cc5ce" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.30.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f83aa366a50982a6baa9e5ee6683f344" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.31.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "6478f4ffdff0a819be40b94d2ae85251" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.31.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "04cee52182f1c577347f63eb4ed32813" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "e681a86d3803b271c9aad6ef5db5707d" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.31.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "bd840f0352debab221d950f23d27cd4b" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.32.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "5401c8e2e13cb25ed6c8a93999f7383a" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.32.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "4f6512c417c5e89d6e6b4de85bd7a3e2" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "8c834c991794a3aa68ea51329e14352a" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.32.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "059902fbab3d9ef2c8978ed615185b49" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.33.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "e8e2f6faba18f4a07e6a13630dab130d" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.33.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "cb54aedbc12aa2cc97475e8ce9e75a12" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "54aee8cc6c9aa3a29f7a128e7fd787e3" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.33.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "cc9344e2a9e24540423016f868280265" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.34.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "20428c3aa723cf2a48713218b572a7be" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.34.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "624daaf765af5f8c597ab22c698dfd65" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "5eb235f294de6b672f49ca005997eb0c" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.34.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "aaf86ff912f84567bd626326d184ca12" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.35.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "54eb544807bfcace5611ef4c3830701c" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.35.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "7fe9e7d88d4c3306e0d8cf7d3bec03ca" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "e747119d14888ecbe15ceb0799cd41c9" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.35.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "924a6a93202997b987f3918537ffb4f8" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.36.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "b3ade6ccff892d87bb2d9fcc54805bb8" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.36.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "6b3fc5ea6610ae1e417f7ce1a6b8d8c9" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "37aec8b0407a511136baa1e93af69690" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.36.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "8c8b3cb2dc4b49ffcdaea101f1db160f" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.37.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "627b763dc459c15ac716b020b49d8d9e" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.37.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "e0042c88298fe89a379f5a7a8cb69044" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "6f4a34b3d748f2ca1631a6121683a3cd" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.37.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "61ab8cbf4c0056d1218645fd5940795a" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.38.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "78c8f74a45f97bdf06fa7cce588a955d" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.38.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "bf1747918c42741fb0988f1a68562821" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "1ebae1b7cacd8f3be26aa80d31d07b38" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.38.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "09378421ef2bfd882c3245d74cb12255" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.39.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "f5970194f94b7efc7551c444a057b698" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.39.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1ea7cf373a26752e80abbfee2fc386f2" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "6ed26e2ea4fae5bf5b7108c4fb55f4aa" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.39.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e81e466ec6dc76123a9cb8522052c8b2" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.40.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "f6bdc0b31737174643487471a49c55b3" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.40.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "1d41e05d60ad6f7f5bc22fda7be2f813" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "73a4da4a11747ecf2ae31909307e2a88" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.40.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "837d67e7f1f64b4ca96ec26737ae4eb2" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.41.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "a20d6faa5e885c74548b87ef939ba6d9" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.41.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "6b37fc534c6c9c8fbff239d79c490358" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "b44744e7b0d210345cf0c0f0a482f36f" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.41.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "50c9e4fe2082a2a599daf1ef409627c6" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.42.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "5526c5388dded60dbb2f01494e453cd1" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.42.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "4f34fe62f2fc8bd592fa20de2f9a19d8" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "478983527ffeaa31fafc38ed9b290af9" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.42.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c9ae989899b894e75bf8680e5d2a4651" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.43.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "d26d2507231a307713567443ab446feb" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.43.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "14b008b532a8a471a295d472c31955e2" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "c2bb07bc281790fbca359c3950880fb6" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.43.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5b7dcfd6f7eb313580139b35495566ec" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.44.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "dd289ccab6f1b85c9e0f64a759ec3d5c" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.44.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "404e95fcc2116ec3c6fa8962e1ef8202" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "69ad0fdd52c5568ddd1a2ce915394ba8" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.44.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d17bbc896690d7ae32c9eaa8761ec309" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.45.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "8660c8072cadfdc384217ca692045a9e" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.45.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "50315613e2a8922d823529c7e7cc03b2" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "bb26712172cb7cb20bc8b90787b1406a" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.45.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "65f476f83843c64fbf72d0f201e1eee8" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.46.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "e549a4f423a1adeee32249f10d92f392" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.46.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "e9d812cfaa8145f9d88ab1b210cfb196" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "d3ecd3419fe478936278cddf13e3e4f2" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.46.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7a127f7eab0ba1711f22a10bb80ff31c" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.47.self_attn.c_attn.weight", "shape": [ 7168, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "2f35869a8a0cb4d37103275e717cab54" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 52428800, "records": [ { "name": "model.layers.47.self_attn.o_proj.weight", "shape": [ 5120, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 52428800, "byteOffset": 0 } ], "md5sum": "b84398987a828e6c4ee161df9bfa1c4b" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 283115520, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.weight", "shape": [ 27648, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 283115520, "byteOffset": 0 } ], "md5sum": "eb2a096ae21fd98e16ef05bcf267466f" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.47.mlp.down_proj.weight", "shape": [ 5120, 13824 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f7e2c33a0ec9c9f509b5a4d8d41b26ec" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 1557135360, "records": [ { "name": "lm_head.weight", "shape": [ 152064, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1557135360, "byteOffset": 0 } ], "md5sum": "b61f817b65ef3a1d1028e40275172163" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 1681408, "records": [ { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 14336 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 24576 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 34816 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 49152 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 59392 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 69632 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 83968 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 94208 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 104448 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 118784 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 129024 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 139264 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 153600 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 163840 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 174080 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 188416 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 198656 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 208896 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 223232 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 233472 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 243712 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 258048 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 268288 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 278528 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 292864 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 303104 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 313344 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 327680 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 337920 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 348160 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 362496 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 372736 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 382976 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 397312 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 407552 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 417792 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 432128 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 442368 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 452608 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 466944 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 477184 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 487424 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 501760 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 512000 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 522240 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 536576 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 546816 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 557056 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 571392 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 581632 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 591872 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 606208 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 616448 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 626688 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 641024 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 651264 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 661504 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 675840 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 686080 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 696320 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 710656 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 720896 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 731136 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 745472 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 755712 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 765952 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 780288 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 790528 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 800768 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 815104 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 825344 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 835584 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 849920 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 860160 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 870400 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 884736 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 894976 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 905216 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 919552 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 929792 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 940032 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 954368 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 964608 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 974848 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 989184 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 999424 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1009664 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1024000 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1034240 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1044480 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1058816 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1069056 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1079296 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1093632 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1103872 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1114112 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1128448 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1138688 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1148928 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1163264 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1173504 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1183744 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1198080 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1208320 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1218560 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1232896 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1243136 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1253376 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1267712 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1277952 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1288192 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1302528 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1312768 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1323008 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1337344 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1347584 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1357824 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1372160 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1382400 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1392640 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1406976 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1417216 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1427456 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1441792 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1452032 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1462272 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1476608 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1486848 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1497088 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1511424 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1521664 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1531904 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1546240 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1556480 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1566720 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1581056 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1591296 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1601536 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1615872 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1626112 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 1636352 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1650688 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1660928 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 1671168 } ], "md5sum": "94e49e672e625292e007ba7d61d222ff" } ] }