{ "metadata": { "ParamSize": 199, "ParamBytes": 15231233024.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1089994752, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 152064, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1089994752, "byteOffset": 0 } ], "md5sum": "639fff4937ff77096dba20d893a4bf9b" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "cbbc6a07cebe4053a22d356c4d5fa1d4" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "8ce8537a836c55d41b2ca2ab682a631f" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "500afea2b54ff3aed8ec28a3163c32e0" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.1.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "901e57dd1f688086d386b0ab8755b8e6" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "2514ed8e7b2cfd6fcbedac96de4bf374" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "53a13d30b33932f09bddf0ed9c191c2e" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "facd01cc80570a2d1cea0df749702026" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.2.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "d49f32dbdb4944dd5851f829da7a454b" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "4129dbed4ddca89918df795edd58840c" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "51891862a1695b8532066c3119e7bf5c" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "632ee44d78fccb210b0a52ff1e9f6f16" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.3.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "dfc05b5bdaf84a836c836d11ded0e9d6" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "6e8baaa2d9c138e8f65d6ed07742d301" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "1ee60ab16f88999fffbb900d0c1e5216" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "dbbbfd7e6b6e2833933bcb21ea83014f" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.4.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "975c051bf200ce8ab3edffb4ef49b08c" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "306f1df9d2bb44c0d1313c4e83e8596a" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "6f163952301a89a82c8dad50f4466a6e" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "dc5f395ed48d3f0a1ce06a2a62071b8b" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.5.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "003ca818a8613760f8241198e08b3f58" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "3f4c241cf0a39241b6c6607d76cc12f4" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "d7ee3bff79f87e7562f2844e716e4d6a" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "8c11499c8d6504291103c0da619c2a19" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.6.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "aa423bef77f5cdd74e4c500742c133fa" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "ad7ee0c3c0f73e4e259c636d09543ed9" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "4a44837f2bfe45ae33073326e1ff61b2" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "68677e982dc93866e7cbd21d266ec94f" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.7.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "4ecc280eb83ff0de0a852086068c53a2" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "d8d33410d4539bc6ee3185ec4c4a334c" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "5415b2dc3ffb018ac27ecf6795c4f0b0" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "b2e5e7b943ea7b6672e6652c887b9b87" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.8.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "e5e3d44904f73b31e4126b77ec113907" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "166f2162eb376dcddca5d44ec0ac9724" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "f517caef320499773309b5a75a8cd199" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "34003c300c554c7344938032cf66ba4f" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.9.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "ac44dd64b2395242573470bd1b9d9e6c" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "9b58ba9f6d7a5a092ddb1f0a2c39e071" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "0690e06ff94648eafd457bf5d92f2670" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "0127e25c351cdedfbe656ccaf6a83a27" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.10.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "ec4b0dcdf85093234a621fc83555a7dc" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "9f9682646413c4562adf1d97826c504b" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "d894ba2c65bde16671a4576e7ac55639" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "641cc564a47f51a68f88526c06380129" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.11.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "2fcb4589d486bc58ee0d7e23abf26f59" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "def9199a1d65256e29e41f619270d6d4" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "65b0cea3f58cd025dfa16447cad536a8" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "bcf0603cedae747bb3d7cebd0dd10962" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.12.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "3fbf6e45f97c1857086bee2c7d7344c7" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "1c65fff32dadb4179b91c5f0b7df0a73" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "9090a2e507fcd8443d50a2abcc5db2e4" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "d9a0f1c33663fd621b9cc0d5b5e3581b" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.13.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "674c8a6c400c5c0965d0fd3d7e4ffb7d" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "d8683b1042e946b9adf20f596ec05a18" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "8522342e5100c8da96e23864c38f756d" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "9e94e16c673e929a934e6ce7bd16ac91" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.14.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "c15a10e7029aed04bdfeff412ec23772" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "278958e258496a675cfef7c6b92ec7b8" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "32f08900bab3f36694e8a67f227a8afe" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "d554bfc29419fdd1e0453bd247b13355" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.15.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "279cc0ddbd6b43a7b29e966a1ea2d508" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "69893637742e12a5fc3998782c239a96" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "dcab5135408d4ab101a75e8193a35faa" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "9bc9ff2455bcf2698e9bbe82ecd9273d" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.16.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "5c4149839d67ed8212f52873d9b98e26" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "8d643dcbd61467d10c4aac022bc836ab" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "1fe025843dea4fd28e6530f9e46a8768" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "4c5b4d99df112bd7819a52eceddb23e4" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.17.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "697142268ab00f87b65bcfe3e0a10a8c" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "1b2885373bff9b30c2ee95f8873cddc4" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "67ba2ada3a32f0751bb8646add2c7dcc" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "46268f19b9d760b3bfe4625c964f2034" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.18.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "653ca9f793f48c5e783799cb8e494359" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "5bd1e1654ca162931fc646eb65e5dfe8" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "c19b337596b79398aa8244f58ebed28f" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "2e46f2327818a33aec50a8211ee49ad8" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.19.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "746af0f8230fe218f445dcac573c66f9" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "c7ff6b2b9038b29f484b4f22f136fce9" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "0ce3135aa4710d86160ebb80169069e7" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "ecdefcd50ee31da3cba7d243ab1e57a5" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.20.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "63c94ade83ad684b4f33ca88848785b7" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "9080adb31749a9479b0d8eeea75acb50" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "33ce641c752ee9cac3b6c75b8585db06" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "2656e2df638894138023e862eb713712" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.21.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "d4cc67d3b41fcbe375b1fc7fea0b2afb" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "efc2890658d5ae818e8869e58a671b27" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "e6f99774817c150d29d58d6be67cc92e" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "fb0b1b2cd7b7c2d861a6f213fe55ea34" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 33548288, "records": [ { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 9216 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33039360 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33046528 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33053696 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33062912 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33070080 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33077248 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33086464 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33093632 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33100800 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33110016 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33117184 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33124352 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33133568 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33140736 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33147904 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33157120 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33164288 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33171456 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33180672 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33187840 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33195008 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33204224 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33211392 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33218560 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33227776 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33234944 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33242112 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33251328 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33258496 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33265664 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33274880 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33282048 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33289216 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33298432 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33305600 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33312768 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33321984 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33329152 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33336320 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33345536 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33352704 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33359872 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33369088 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33376256 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33383424 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33392640 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33399808 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33406976 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33416192 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33423360 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33430528 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33439744 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33446912 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33454080 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33463296 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33470464 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33477632 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33486848 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33494016 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33501184 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33510400 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33517568 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33524736 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33533952 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33541120 } ], "md5sum": "4e42cd8492b5461758305584714d23bc" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.22.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "12e7d842f41e13e97d019d53ee31dda5" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "442976c32d6a3a081ac3fce9d4f704bd" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.22.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "a9dae54e79cfafbdfa055245f23e96a2" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.23.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "6c8ae6f045a5270f17bf9ba3b761e27f" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.23.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "7554a14ef679d53dcaf1d5a4088b31fc" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "79ffbbece0d1ce43214dee99afe3b59b" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.23.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "cf8da193517e57d8b3b253b5b75aa107" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.24.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "b027228819ab9843e4996fdce16c5989" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.24.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "4472ae0c4b7dec286e8ad205dcc3bd9b" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "1281c293ad83754abf903a934f453671" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.24.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "54431184f25dda0ba9c83f5cf9d7b963" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.25.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "f1981ecc6e302ec1dab92b94fbf7931e" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.25.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "10c0ab6b8bac2275345f70fc659faca0" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "42cc19b1097ae8558fc007d12a9bd308" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.25.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "8d57dc558c660a70a72cbfeb45fa6eaf" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.26.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "6d0e29bb06b89450ce0e16ded49c8675" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.26.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "d14c19b54df724fa84dcf4f00a68d635" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "c538a31fd20ca4a188b54920e01030be" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.26.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "52144e63d33fbb83b71f07531ded1ef5" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 33030144, "records": [ { "name": "model.layers.27.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 0 } ], "md5sum": "5cdeeea2a99992e6142760e7ca300d3c" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 25690112, "records": [ { "name": "model.layers.27.self_attn.o_proj.weight", "shape": [ 3584, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 25690112, "byteOffset": 0 } ], "md5sum": "254dd6a25ae531ba545ad98e058cf98d" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 271581184, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.weight", "shape": [ 37888, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 271581184, "byteOffset": 0 } ], "md5sum": "c53d95dc685275a16be35d4abcfb9697" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 135790592, "records": [ { "name": "model.layers.27.mlp.down_proj.weight", "shape": [ 3584, 18944 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 135790592, "byteOffset": 0 } ], "md5sum": "ce938d992cb98ce810294aa27272fff0" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 1089994752, "records": [ { "name": "lm_head.weight", "shape": [ 152064, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1089994752, "byteOffset": 0 } ], "md5sum": "688822edc27946d29b00439e00e58b2c" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 33178624, "records": [ { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.c_attn.weight", "shape": [ 4608, 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33030144, "byteOffset": 9216 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33039360 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33046528 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33053696 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33062912 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33070080 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33077248 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33086464 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33093632 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33100800 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33110016 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33117184 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33124352 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33133568 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33140736 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 4608 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9216, "byteOffset": 33147904 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33157120 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33164288 }, { "name": "model.norm.weight", "shape": [ 3584 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 7168, "byteOffset": 33171456 } ], "md5sum": "e49dc083a5d8f6a62e9ef5447bb42cc6" } ] }