{ "metadata": { "ParamSize": 515, "ParamBytes": 8293965824.0, "BitsPerParam": 15.999992098355717 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 262144000, "records": [ { "name": "lm_head.weight", "shape": [ 32000, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144000, "byteOffset": 0 } ], "md5sum": "d6b7f632d45a6671e86edd191d6060ea" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.19.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "0f961749ae3cc72efe4c3056fe9c4dab" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.19.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "ec3abe22011760ea4767a4e5e8d0138f" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29351936, "records": [ { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 0 }, { "name": "model.layers.19.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 8192 }, { "name": "model.layers.19.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14680064 } ], "md5sum": "e63ea55efadf77a03c81a55ebc2ba97e" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.20.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "58d526fabbdeeb8027df5f3d500c04ec" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.20.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "2ea348a30bffafbe6099dc7c574d6cf3" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14671872 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14680064 }, { "name": "model.layers.20.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14688256 } ], "md5sum": "8f82e4888dd805289a34ce62ebbc1d7b" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.20.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "259eedde244f1f585d8feec5461535a6" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29351936, "records": [ { "name": "model.layers.20.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.20.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29343744 } ], "md5sum": "80a61fcefc0b3013c1efb09375785027" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.20.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.20.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "8062d9a37b4a43152790d0836835f7ec" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.20.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.20.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "8667c63a7952d25b2249f16a5076c4d3" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 20127744, "records": [ { "name": "model.layers.20.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20119552 } ], "md5sum": "f52c422328d9d34452a4f8dc0c382a3d" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.21.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "31334e2026a4aa5e46a37f0e6400ee2a" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.21.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "05d474be9356231f981debdae5620c90" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.21.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "803f9d6457080df171eacbcf22f6f4e0" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 29343744, "records": [ { "name": "model.layers.21.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.21.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 } ], "md5sum": "4ac74ecdddd362924e829b1554b2705f" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.21.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14671872 }, { "name": "model.layers.21.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 14680064 } ], "md5sum": "3d2f94a7671218060983d5324f229c41" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.21.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.21.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "e497bce19a56db507004afecba930c47" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.21.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.21.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "4e504b94650aed9df9f85e863b415fd5" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.22.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "ffd789b89156cb57f684e882826bf801" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.22.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "e0ac35e1f81182659bcf2c372a0a25f5" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.21.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10059776 }, { "name": "model.layers.22.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 10067968 } ], "md5sum": "b6779177b8deb0578e1f8a1e17a17c95" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.22.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "db1eceb0050b4fd04b019f7acc9acecb" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 29351936, "records": [ { "name": "model.layers.22.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.22.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29343744 } ], "md5sum": "6c3cda19d7a3854c3cd05f9de628a0e1" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.22.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.22.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "f9a74a3b6f5f0ce55572db1ebddabf2f" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.22.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.22.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "14ccf686650953f0ce9196de93b5945f" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 20127744, "records": [ { "name": "model.layers.22.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20119552 } ], "md5sum": "e0cf61542c7a183104021f506f0b8c14" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.23.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "7566e569715d983fab71dc70b4f4680f" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.23.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "0df508b3537589d795ebbd0ccca833c5" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.23.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "7b2573494795b03d4b7a673e58fe4138" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 29343744, "records": [ { "name": "model.layers.23.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.23.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 } ], "md5sum": "141d5e1a2762672f3278b7bbf7fc0c3b" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.23.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14671872 }, { "name": "model.layers.23.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 14680064 } ], "md5sum": "60df112fe37c044218200e29c535710f" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.23.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.23.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "8a220ad3e8a443e16ff11853b61c4d56" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.23.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.23.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "929648ede7c3347aa223ee696910845f" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.24.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "8856b12408dc6f6aca8ebaaf986bc9e5" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.24.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "ae0ddb874642128fd46603a3a163ce0d" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.23.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10059776 }, { "name": "model.layers.24.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 10067968 } ], "md5sum": "0f1e5793b286adfbaf40f440d52cf1b1" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.24.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "90f8cd675a9530bb60893594e34ac9f2" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 29351936, "records": [ { "name": "model.layers.24.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.24.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29343744 } ], "md5sum": "a08beac106e9ffde05c088c704b1c2df" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.24.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.24.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "da41a283a4813a313b57a0604969f681" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.24.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.24.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "9fc73dea0348886673b7d63275b34138" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 20127744, "records": [ { "name": "model.layers.24.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20119552 } ], "md5sum": "bb088ab30beda14832861c35d2317d8d" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.25.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "08a7fa6cafca921cf353d55c6cae55ed" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.25.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "dbb4b1964e6d07d96da76c33ed818466" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.25.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "7e7bf6e04448f3f2c43a225ef715477f" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 29343744, "records": [ { "name": "model.layers.25.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.25.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 } ], "md5sum": "99451f17aa62ea4dc1b909376e7c83de" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.25.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14671872 }, { "name": "model.layers.25.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 14680064 } ], "md5sum": "1be740db12b41a72194ff9cc2550db09" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.25.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.25.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "4f674d36b67c6395d43013abe29076a7" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.25.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.25.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "9573943b5ae76023f0047d97a17d3762" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.26.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "cdfd82e06cdbec1d9f05fecf2ce9110d" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.26.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "01279d85f16da93cc1130895dfe6dfc6" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.25.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10059776 }, { "name": "model.layers.26.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 10067968 } ], "md5sum": "6491f1a97ddb793e10777ac8bd7a3e31" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.26.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "6ef38dd168a8900804021955fa275454" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 29351936, "records": [ { "name": "model.layers.26.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.26.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29343744 } ], "md5sum": "b535994b0b4afc10e69fdcf9ef845f12" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.26.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.26.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "6f92f57d9b799f3755886ae726baea15" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.26.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.26.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "14d52c5e824138429b4c42fbdf3653ac" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 20127744, "records": [ { "name": "model.layers.26.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20119552 } ], "md5sum": "53c0eda02f828e3723106a31eb3c40ef" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.27.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "cc5b04f630645627783a25484521d365" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.27.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "3f2c84ec3eebb178e0a687425fd2c610" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.27.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "58163e6773126a1bf0ff22f272c3fee6" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 29343744, "records": [ { "name": "model.layers.27.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.27.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 } ], "md5sum": "fd6b5f9149f4d87bbdf500de9dc46f86" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.27.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14671872 }, { "name": "model.layers.27.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 14680064 } ], "md5sum": "8f7bf3ba04f6eace33e8346097e4859f" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.27.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.27.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "149aba69dca61ea06248260d63357a6a" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.27.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.27.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "2af24ef1b0ac69118863f6f5d662bec5" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.28.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "a5784b0d2686ac3665c25e5d11b582b6" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.28.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "205aee2b35911ed10714174e57cc7a88" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.27.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10059776 }, { "name": "model.layers.28.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 10067968 } ], "md5sum": "a3bfee92ee6d81752f283d88bbb3d25b" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.28.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "594ffe8127e65ad0c6edac084e1d5c22" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 29351936, "records": [ { "name": "model.layers.28.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.28.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29343744 } ], "md5sum": "d00866626dd47fff3a2c06868b871ff8" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.28.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.28.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "95e7c4446d3b8786cce90273930dddf2" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.28.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.28.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "337f017260139678111784cadde067c6" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 20127744, "records": [ { "name": "model.layers.28.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20119552 } ], "md5sum": "e15f4254f30f735c2487c000c0ce5c72" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.29.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "eed512795c28e63176b4be49ae0ed5f8" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.29.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "d1117126f0e4466b9425c9ea7d3cb073" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.29.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "e327f6067dfad0ff49e2efa9666b7ee3" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 29343744, "records": [ { "name": "model.layers.29.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.29.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 } ], "md5sum": "6fd6afbd3a82334e13e12fbafa8f1d53" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.29.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14671872 }, { "name": "model.layers.29.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 14680064 } ], "md5sum": "2a1383448643ab67e4800fbeda1cfbd2" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.29.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.29.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "7399d04cf8f042791457e2005fca86e5" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.29.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.29.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "f9217d4aa7dc73842ad45ddfe8f79447" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.30.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "46af4ed6b69732f248403218b423dd79" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.30.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "13d87574552f05743d9483b3de35060e" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.29.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10059776 }, { "name": "model.layers.30.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 10067968 } ], "md5sum": "659072a361cc9a52a53b14d9f7813a2d" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.30.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "dc9ef725e6b57a9c0d60761f06ebad56" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 29351936, "records": [ { "name": "model.layers.30.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.30.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29343744 } ], "md5sum": "1ef8659f01ba5e7f96ae4df9a3d17d99" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.30.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.30.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "f3cda9e254a9d768c5cca698e764055b" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.30.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.30.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "50f6ff665fff078221aeecaec3a3b328" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 20127744, "records": [ { "name": "model.layers.30.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20119552 } ], "md5sum": "b848b5a035b8898c5d1b43210b0602b1" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.31.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "32f7cd62b42ad7f4d6849118912dba5f" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.31.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "0bcc0274aeb7b5dee8ad67ab3ae756fc" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.31.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "dff7319abfe16c9eecb66baec49bcbfb" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 29343744, "records": [ { "name": "model.layers.31.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.31.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 } ], "md5sum": "5b509f2c500c7d0e0ccf105c3a61cef1" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.31.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14671872 }, { "name": "model.layers.31.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 14680064 } ], "md5sum": "6fb5dacf8315c510a2b2b74089d9b06b" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.31.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.31.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "f34cf975319908e86b55653ba2ad70b1" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.31.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.31.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "a5c91c20793d04892943e69a669fef76" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 262144000, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 32000, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144000, "byteOffset": 0 } ], "md5sum": "f1f601338a702fe42b2da79aa982284a" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.0.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "6b36c570d559a6919b25edf17a10b224" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.0.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "76e9a7e135b5bad11cd04a67b6eb124d" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 24748032, "records": [ { "name": "model.layers.31.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10059776 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10067968 }, { "name": "model.layers.0.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 10076160 } ], "md5sum": "da0686342dcd32f355ebd03c783f1268" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.0.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "fbb90114a41fe6a2fcd2a6a280dcbc72" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 29351936, "records": [ { "name": "model.layers.0.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.0.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29343744 } ], "md5sum": "b8d9642c83cc9ff727def507ed9c5673" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.0.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.0.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "c43c1442512b2af8bde1c79aeaf4262a" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.0.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.0.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "b835a1f7e253668766e340d5e42d36de" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 20127744, "records": [ { "name": "model.layers.0.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20119552 } ], "md5sum": "2df1e5564df1e24bc1820fb393a33f4a" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.1.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "55182bbbd6e58d778bff04c3209bdfd2" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.1.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "e447267f1e42be3b800c168b8e5d7225" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.1.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "832071c44569a83bf6461b13732c7742" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 29343744, "records": [ { "name": "model.layers.1.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 } ], "md5sum": "b0d71e233c62f544dcc6144410c00f43" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.1.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14671872 }, { "name": "model.layers.1.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 14680064 } ], "md5sum": "c9f4d2265207c551143f067fe83fd945" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.1.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.1.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "e8246e193e3bf45011650689cce668cc" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.1.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.1.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "54049c000e2ed008c8f61b6ef835abe7" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.10.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "88271f8b06c04c7ca8e57c114c56c91d" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.10.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "467e3d6f9fc58f800a2cad75f2e7b079" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.1.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10059776 }, { "name": "model.layers.10.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 10067968 } ], "md5sum": "1dd9ddd4a89e2b06b67b3adc7e13df44" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.10.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "f195a02c175ce5b1bbfaba4c807817e1" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 29351936, "records": [ { "name": "model.layers.10.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.10.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29343744 } ], "md5sum": "1b51449216213f4c849aa11e6c8085e0" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.10.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.10.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "d27642093937d9a2695bead12ed3bf40" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.10.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.10.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "59d4a11dfdf2c69a174266e9c40b2fb0" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 20127744, "records": [ { "name": "model.layers.10.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20119552 } ], "md5sum": "472d4200000a6804c4922c82f002f7a0" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.11.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "19f8d8b8356c72baeabdba292adfd968" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.11.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "0802acbf52c84a2d435f1d985c8a82a5" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.11.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "026f4e844678330169ad93d519bbe40d" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 29343744, "records": [ { "name": "model.layers.11.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.11.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 } ], "md5sum": "2d7f256d777c4d668c56ebd76304168b" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.11.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14671872 }, { "name": "model.layers.11.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 14680064 } ], "md5sum": "5a6dbf7bc280572b4e4534571c0be25b" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.11.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.11.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "fd5a1e15994a694516a50ad4e84c238e" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.11.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.11.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "2f402e50aed5a4f67e51562bf1380514" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.12.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "1d13ed0db8ca22291ae540325d0a0d20" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.12.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "597fc148055d3ede3c44f01d4a55ca79" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.11.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10059776 }, { "name": "model.layers.12.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 10067968 } ], "md5sum": "7621844498c1ae2353434216ffce5c07" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.12.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "5591493c59aeba0a4c097223c0f8a026" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 29351936, "records": [ { "name": "model.layers.12.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.12.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29343744 } ], "md5sum": "127eeca1bf279bc92b8249ab4bbc36e7" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.12.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.12.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "839500a9c2001677d8e1c91dc2775248" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.12.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.12.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "aa8dd0bff36003d86fc1223db3a1719c" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 20127744, "records": [ { "name": "model.layers.12.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20119552 } ], "md5sum": "fd4944155cf85ae0aa2cc5eb89f9a61a" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.13.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "c8346d670490a78b801b6622baeeb40e" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.13.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "b1254cbbf54727990cbaf635b72e4215" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.13.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "1bcbc0fa0aa6abc899badca950efc15b" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 29343744, "records": [ { "name": "model.layers.13.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.13.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 } ], "md5sum": "2c98814b5d136598c4c8829610cd8454" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.13.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14671872 }, { "name": "model.layers.13.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 14680064 } ], "md5sum": "879e91a09341fdfaba650df523c708d0" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.13.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.13.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "0be0ae5760f248aaa2258aae69378c3f" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.13.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.13.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "cbdbb2f89d3475a0b165cfd61cba92d8" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.14.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "4a82cad39441a2a0214359b6b84de6f5" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.14.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "9ad22d507f7cbd8ea84b395108f59c43" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.13.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10059776 }, { "name": "model.layers.14.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 10067968 } ], "md5sum": "bd33aaf391effde6d75834d9c78411c9" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.14.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "4a1a52a37d35b979de396372f2278691" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 29351936, "records": [ { "name": "model.layers.14.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.14.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29343744 } ], "md5sum": "410293ddd89b172a110decc675ddff1e" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.14.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.14.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "33b76aa1f4c43ffe9ce9b1c9b78eddaf" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.14.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.14.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "9fd4a7759018782c07f6f5639d8402ea" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 20127744, "records": [ { "name": "model.layers.14.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20119552 } ], "md5sum": "7d1c0605e0c6903df16dc8953f07a872" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.15.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "be9a8827829fed7726fd849e463193f0" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.15.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "f83445debb2c8625d5ec6c4a2dd1abd1" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.15.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "5dc27d9cbf3777b1ea1321033c60a82b" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 29343744, "records": [ { "name": "model.layers.15.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.15.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 } ], "md5sum": "c32ace714c5f72d97f753f269b5c44b3" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.15.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14671872 }, { "name": "model.layers.15.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 14680064 } ], "md5sum": "8b4cb6b3241b50da70df3863bc19ec1c" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.15.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.15.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "3f959901770611111ceec4a04f8f0c2f" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.15.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.15.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "fd8ecd6a45ad8bafe7c4069cb555ab8e" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.16.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "064eb1670c8089a1e92ed17ec70c52f5" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.16.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "48494274a081f287b245897350d9da9e" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.15.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10059776 }, { "name": "model.layers.16.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 10067968 } ], "md5sum": "80c8e487690303bac428354d7f128d91" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.16.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "746468dad33708404785f7a1e3cff2cc" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 29351936, "records": [ { "name": "model.layers.16.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.16.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29343744 } ], "md5sum": "93cd5d3fc2a0586addf3a246257196e5" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.16.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.16.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "2875a72c8ce8094cb782516a3acc3b6a" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.16.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.16.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "96e55e519051742af15aa54d3b9c268b" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 20127744, "records": [ { "name": "model.layers.16.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20119552 } ], "md5sum": "3b20bf00a9ee2a210b131242f6834b03" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.17.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "5cd6541bf3edaf7a49380fc4264f675b" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.17.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "a59bab871e2aaf0254d0299e9c6dca91" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.17.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "9182217b00928c7af62ea5f15603d4f4" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 29343744, "records": [ { "name": "model.layers.17.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.17.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 } ], "md5sum": "ae7aa74e10ada3acdd55be63511717cf" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.17.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14671872 }, { "name": "model.layers.17.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 14680064 } ], "md5sum": "8c14354c4caae3f6f872de13e66882c6" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.17.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.17.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "c7cdfa4671b8dae084b7f52ce26a6d54" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.17.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.17.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "d3bf4685330db0f34355b820067b47cf" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.18.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "64ebeb24545d103a552ebc9fa5f6042c" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.18.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "8f18000cfeafc2b483ebdedb7bc093a3" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.17.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10059776 }, { "name": "model.layers.18.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 10067968 } ], "md5sum": "03c04510943937836cf8c41d72548354" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.18.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "d3cc87945d93ac355bde1b7b4eb4a847" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 29351936, "records": [ { "name": "model.layers.18.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.18.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29343744 } ], "md5sum": "fabeb2599962567fd7fa7296da5a1439" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.18.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.18.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "495bbe5f87412b261031d7bff34aabce" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.18.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.18.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "337cebef2d5d217b3fa13f63ca0f924b" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.19.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "c963ace614ae35c254182f40df6d07fd" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.18.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.19.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "3ffcb3178015cf686a35a246599cfb35" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.19.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.19.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "3a1f7a194d6106b7857a144fffa5b73b" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.19.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.19.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "9c9f9b3b2e56c5f0a4d45a202261f39e" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.2.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "7d7e2f11799c279e79623bf601f907ca" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.2.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "fcb1de8d764a0dd6ca93b21f4b42f260" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.19.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10059776 }, { "name": "model.layers.2.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 10067968 } ], "md5sum": "644dd378564000e7d74966d26b79286b" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.2.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "dff70980b0e519e0c6fdc5b2bef90694" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 29351936, "records": [ { "name": "model.layers.2.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.2.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29343744 } ], "md5sum": "c4ee5232fdc5ef7cfba528cb6f1f2e36" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.2.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.2.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "f4fe097b322d4adc2939be5f29473bf0" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.2.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.2.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "c42b940743c532d326f591ff9d9d826e" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 20127744, "records": [ { "name": "model.layers.2.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20119552 } ], "md5sum": "d862464193f991da75cad53c46cbf3b4" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.3.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "1de374fc17adaa6ecbbc9f18cf7ab95c" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.3.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "777540e1ad97de2cf9076317a8b1f81c" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.3.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "8a6136f943c1c9d28fd36f2f61cc0f02" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 29343744, "records": [ { "name": "model.layers.3.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 } ], "md5sum": "69ec5b0ec486ebdf78709d701ba26e9b" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.3.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14671872 }, { "name": "model.layers.3.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 14680064 } ], "md5sum": "634063f26da7d99e825ece0737c87ac1" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.3.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.3.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "cff7f016c0d7753098a6afef708f629a" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.3.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.3.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "e4d337be3738eb97c8107a2205308528" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.4.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "6fe7f24abcd399162552c9ae0b40897e" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.4.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "bdb3c8708ab2833d098537e4e84b3a53" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.3.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10059776 }, { "name": "model.layers.4.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 10067968 } ], "md5sum": "df6681eeef067454eba0701527a9ec7e" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.4.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "b6b3a0313962ba2c2191fc015c41f7f3" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 29351936, "records": [ { "name": "model.layers.4.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.4.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29343744 } ], "md5sum": "cfe61fdf731e0632c018bb4318bb501b" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.4.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.4.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "455ab698ed51f4bfb3bee95367f49ea8" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.4.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.4.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "e52e2c0b6595da573628047d270f32b7" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 20127744, "records": [ { "name": "model.layers.4.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20119552 } ], "md5sum": "da8a48cd09edc27eb06f45a79c5bb9c0" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.5.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "57bde6aee14add8527dbe6c3fc2c6220" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.5.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "f9fedc04aa8ca5bf2604b4a93182c15f" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.5.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "1edd63de79179bd03c4321ece7fecc04" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 29343744, "records": [ { "name": "model.layers.5.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 } ], "md5sum": "75a82c438c72706c0aad6db83b3d2347" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.5.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14671872 }, { "name": "model.layers.5.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 14680064 } ], "md5sum": "4232f87b3ce35313c23664b3a8a228d6" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.5.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.5.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "833c8b03be22e53ef6cc268468cdab8b" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.5.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.5.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "beb777a234a4f71877e206b322938463" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.6.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "e1ecd2a3f261b92a75fe46d89cd12eec" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.6.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "9a0cc859ee3dd05d1412a25aa7ae4d78" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.5.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10059776 }, { "name": "model.layers.6.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 10067968 } ], "md5sum": "2826061a21566902cc1d2e71428a9848" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.6.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "893413d9760150721fd8b36dfffcdbac" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 29351936, "records": [ { "name": "model.layers.6.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.6.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29343744 } ], "md5sum": "ccdad895b6ababbf50177ff7b852c538" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.6.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.6.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "98824e731eee9883cf62301339cdc09d" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.6.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.6.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "aee2c26ccd06640f04d5d17fca556fbd" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 20127744, "records": [ { "name": "model.layers.6.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20119552 } ], "md5sum": "4f208856f93a2a47bce35dc3474da92b" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.7.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "822958bbdba15f2701a4d0bd7257f3f4" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.7.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "9e82af14b08d8bf36d663ce22a821310" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.7.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "636dae948c491f21be9fdf43866cd296" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 29343744, "records": [ { "name": "model.layers.7.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 } ], "md5sum": "61a40520726999b311f1525765c79311" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.7.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14671872 }, { "name": "model.layers.7.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 14680064 } ], "md5sum": "74febf331ac30c7d64306ff99d334ff9" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.7.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.7.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "20c8f488e49c6e91beed602e4c684c48" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.7.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.7.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "50bdb812a9af994f23c7407d9f2a17de" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.8.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "ecd1bf4c95c40955570cfaddf3cac8d3" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.8.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "7868d8eb9584c9a45e9b2d8617073530" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.7.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10059776 }, { "name": "model.layers.8.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 10067968 } ], "md5sum": "3a50e0635e8da3f701a09ab0756880d4" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.8.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "bb4729013290ae7accc88eb75a292b54" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 29351936, "records": [ { "name": "model.layers.8.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.8.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 29343744 } ], "md5sum": "5678b66ee03348befe2c6a586797aa9c" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.8.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.8.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "edb17374771b81ee8826e4afb47f4309" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.8.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.8.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "ad9b9812e38650ddc5f9cdeddcc95aef" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 20127744, "records": [ { "name": "model.layers.8.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20119552 } ], "md5sum": "9103290b7470c65364aa92f6623c25af" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.9.mlp.down_v_proj.weight", "shape": [ 1791, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "8203bf7dd25b8f97de23805a639b6849" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.9.mlp.gate_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "dd57f6f4b44370def4bf5248d235e324" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 39430656, "records": [ { "name": "model.layers.9.mlp.up_u_proj.weight", "shape": [ 11008, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 39430656, "byteOffset": 0 } ], "md5sum": "43125790324ae86aede4b83a79f78ab2" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 29343744, "records": [ { "name": "model.layers.9.mlp.down_u_proj.weight", "shape": [ 4096, 1791 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 14671872 } ], "md5sum": "ac6f7d8cc3e4f3ff32ca0d9e4670ac97" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 24739840, "records": [ { "name": "model.layers.9.mlp.up_v_proj.weight", "shape": [ 1791, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14671872, "byteOffset": 0 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 14671872 }, { "name": "model.layers.9.self_attn.k_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 14680064 } ], "md5sum": "f7670c85358d73590da9f5fea5930714" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.9.self_attn.k_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.9.self_attn.o_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "72e419af860a9b0886719c308751f325" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 30179328, "records": [ { "name": "model.layers.9.self_attn.q_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.q_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 10059776 }, { "name": "model.layers.9.self_attn.v_u_proj.weight", "shape": [ 4096, 1228 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 20119552 } ], "md5sum": "a4a0949b2f7b2800da450dbbe8c4fc96" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 10059776, "records": [ { "name": "model.layers.9.self_attn.v_v_proj.weight", "shape": [ 1228, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10059776, "byteOffset": 0 } ], "md5sum": "92ca71a93cc925c0f815e9e2e0137e4b" } ] }