{ "metadata": { "ParamSize": 709, "ParamBytes": 20481200128.0, "BitsPerParam": 5.000922334820072 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "a5d3556afbf5fa2be30ba135893b2676" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 48660480, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 48660480, "byteOffset": 0 } ], "md5sum": "bddafd118b35c61adfdc893d17305824" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 20658176, "records": [ { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 14336 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 18364416 } ], "md5sum": "19b135a2eb2fc9e75d27e9bbbf8222be" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "96d61b6c6ff7eed03c0ec4c06af07b51" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e102203a2283d305c21639d73e71d5ec" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "137d40dd6f3737c2f568c96e734cfdf8" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "9b66adfd9fc06e9a60ca90c405e3b17c" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "2da0b79e411092e6d5f84cc34475b6ff" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d9e2025aab4b50028a62018aad13e522" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "670e73698a4bcd95406832720b1107a1" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "894bd9def86ba5351a39a0b86d426a83" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "450d8eecd0b1c197901958fb8bb9ac83" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d025e698135c43d2ec55c1a519c31cb5" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "1d05f468c62b8b67858466bb71fe0f16" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "1824fac088ce6e4cb8a431e6f6824a64" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "95be36fc27e2af5072f34ffdc88b50fd" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2c70805174ae2c441aa87f0c447ccfc7" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "9a6b806376439ebc53754af16bc4af01" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "78ec449cbff1bb0c519719a128cadf13" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "63ba4206daf94c714bad8c134b87f2c1" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cce4f63aba4b501535bb50c941ad0256" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "5a1b8cb3489a5d22ede126946bfbc1ad" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "0fb98aa8df1d6ccb1b371d01e6c9ea0d" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "fc30e8c13cf738369279b67126432fd8" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a58e650b8d6fb216fb6ef26a7a17b454" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "61d178956ae74f905644aa5b555e1cee" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "ed4957794b3adf02335125f4f949210a" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "4d008331d44d3491defce907cb4216a4" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a288322ad20835b49e60aabbe2d5f005" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "2bb19542ece2f2f5d36322696040c13d" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "6eef4376e35236e62e9c4ed19e61cccf" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "020f73c90fed27f64f92a04c3ad7c604" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0537d151962b63b1a9155689f71ec607" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "fcdeb5497cc9615e6d0408fe473d9ce0" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "54d43f3df746cd1a724666239129a18d" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "41790ff97c4fb2b3f66ded81d692a3ea" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3ad1843a21e24195895ff3e6e8db01f4" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "f76b43fbfea3d75c090cdf9f105c62b8" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "3cbd6ffa460215417908745c6b13ea25" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "912c59fe92c4267ffddb20ce25919179" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9852b7516b561dbd7fb8865328aa9be0" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "e3a8e18066f09a6eeaecbf0055fe02da" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "5843c4ef81f00e967a891cf030f156f5" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "cdc786bbc8ac85c5dbf516af6d985379" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "15fdc367a5c861a466216ee604d108e4" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "8eee545b4355ad5e23397ed46ff954b1" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "7d7e4c354a8408e18bcb5376e015e6d2" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "fbf236e88bc15238e5b8d85fccff18cd" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "fc13cc1718c7cbd480765f0c68fe1bdb" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "ca9da765e9c61456c3c99a03d2adab60" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "40c2c199df2bafd7e5013a9d7f0b2768" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "43ebd90127e550305d0fb5fd68d6c28a" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "69d02941d620f906df4e74a4909e6144" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "96e9da15f45b85eb89177ed947f38142" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "7b93708d4243232f1dd6434eb87d79ef" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "dedb15a35080aba3a55342e9066d9319" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "264527d6572e190b42f5ff7a1d1ffd98" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "5d58f5f4f467027a250ebede065c6d3c" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "fad56ee36f05261e7d8164ce1dac2f5e" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ca82f259eaacbabb76218fff57afbbef" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "bfd0013ccb55775edcca13c49e874992" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "140e46d472a55960ef02fb13306c0cf3" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "699802b56968d7c268fbde7cdd7b8880" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "049015ddb63b7b9a7f939a6be8e276be" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a6eb79588ba89a69f8ee037e81857a37" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "33dc46946da56d6401e201f47f5aaa4d" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "e56cc62d757c3035b98b82a077a4fc7f" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9d951a658d29edac6d58b7abd2b1d303" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5368cdb100d69f3e39059a4c74b79a7a" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "0f81a597c68d3787141411f38f14a510" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "74730724666239da8d66fc69ee951bdc" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c9d556d84fff39b2157e42fb667e372c" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a536a22ea9095c94383e5f228acadd51" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "8c064fd2dbc57c49c121988504bc9aa2" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "a9c7581a7393c2c40a052933c45744b2" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c2d1a4e57274259f4bedabdd25508d53" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "76ae742e6a343339da20c923db42cc7e" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "dee16861587bba6f8652f9eedaa61c93" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "55513331194fdc67f22b9bc479b85f0a" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ba873eb1b7f3b6a31328a7a9e3b950a3" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "68a7bcdd600074debd8f22b7b34b0062" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "218e66fb190d5f7334deed66da8811e2" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "7ce627eca84d769b856f4b12d196c909" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "bc7145d5904e1570d39ecf205d9afd89" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0a34b01b05830af72b0c568d47e836c4" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "e3d5a64b41fbc416ef19e676d765ee4e" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "8735d172684d7c48263cdcda7fba79fd" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "86eda476773b957e344a409f375541f7" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1c7c1c5f1120b75049946cd39e1597e9" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "1edc28f3f3ef940cd1d9c9c59fa25f65" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "168974eae400fab555641d1aa2ef5bd6" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a0fb3e421bd805b3891d2b1a01ca0628" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d19026ce054dba56d76d7ce23bb5f062" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "96dbae919b4c1c31035cc128b1befab7" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "7c9d064e9a59d59b3a9917d366b03171" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "17ac8e3f3cb20725188ffa9202692f6b" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8c5645ad43bf5864a20bd160066f47d1" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "684ba24a4d1df17f0b8a6d31a4cc592a" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "ee6d4c1abd0e1e5c6da64c27f9fbb476" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "01080c3099d51bbb061f878d693b1455" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "22c4522d55d3ba75da4fe61289ea5a08" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "1913e9b87cb7dd69097a2702e81f9b11" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "7025e9831d82de8c8a2ce645793821dd" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "de9b75531929690e894bb1bdfb2ef97c" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8dca9a4901b30a6714a667990f7837d7" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "9e50ffaeb882f1077db4eec3891a6113" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "705da59b51915b542edafa8003c4700a" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "06bab1fd4acda03e0e7a0087e94062cb" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4571b2586d5a793d09de3a9d5979b63a" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "f3068f56cec0b3d716a60c180e84485b" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "cb33d0b6528d47f65e7d0be4b6f79cfc" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5dc6b14108e49e4de9b758e7c58defb7" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ab4d214ebc3a59b454543a84b45841cd" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "8ef0085c4f981a806092de6349e09afe" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "55d6d2b27510aba58ce528e5ad31bf37" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1d82218603869d6d45991a0fce328ec3" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b4c7025553f3e53d015fd87153e9e62f" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "3ba369687c5c8c4481066f3dac2cd08b" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "948df02988a0d19a7f54986651829048" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f93a58cb9c20f7d021390116f05ae9cd" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a01e4cea537f55d5a27b200d2f74e7a6" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "2e6852e453456f8be5d35bcf5246c1ea" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "6f7f3f3ba57a09d69ea385c358755dcf" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c3cce73c0044e27aa85e30c0ed0a235f" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f63eac081500f2adf4bbf8b76bbf4c10" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "ee0b78cd4742e3a14ec654ddebb0c8cd" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "a7d808207e9992255f5c3e9e3eb5e7fa" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "897e9088b50e7df2dcfaf535755a7df6" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4133fbeaddc366f4eb53a5f9f958c59c" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "a693790bf96fcb6a443b118188b2fab8" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "f1897ee6dc2bbffd32762a9573967ef2" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e41ad6a243695ab694829e42a5aaa0da" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "44a01da91b46dfac179afb3875a00bfb" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "d9e05b0db2fe692bf577150972ca1b46" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "d691d31a6b8b3f56fea92fd35d7257ab" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "fd114b52335d726e693ba042cf18d760" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "35a3a39d22cbca791eefadc02850fb54" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "c9603929fccd321415feecb7a3d39aff" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "dc53aaf9e0dc6f0aa533e6378f19a2f7" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "962ac5b9fadb98a80d8ac08e585eabbd" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "76c991101adb7674927a925091a2b782" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "2bac3d9eff1f2b4640222ed88eb556d6" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "e9104e8e4d223f898eaa527e237fa814" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "825327b0c3fe5b22a2a089945a2b13be" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "eb914ffa9e97d8fc380abeb7571f25ff" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "62a7d34aa6bcb699d5ff34dde670b252" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "5e2e1d5ee46ab6e7b1d01a935cc434b1" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ccc6c7f43bb43d3c11598e2ad335bab3" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2c039f2f46558c83945571a5d8906733" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "94667f75873c81ba0ea41ebf3996061c" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "fcce211afc88cf0bfec729e3cf43bf5a" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d93b539242e2e35021792a9c7a9eb421" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8cc334c3d0fd4e8081953f17af91e9b2" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "aeeae62fc71cd93870a8ebfef2c3651a" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "82dbc785f53946f30ab25a9312771c08" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "661dc4c8725b5cf23d91fcac3a79770a" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e7613be59ad90a50b58d4573950e1425" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "b3fc63d69813f37d0409d1caa968a3a2" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "aef64a9487d469b087fa749b17f51fe1" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1ae02bddb160db37712c6ea66c6a2804" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1e05eae54a6746dffd004b79fd229d54" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "0550c1fffee045b3aba22666f4d4f065" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "24f5ead4840e936bbee0625244a8a1cd" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "902bf1082271f405dd765d01a6e371df" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "74d570aa041b6b192f01b1d746bbbde0" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "4b5532357829abe108014d35ffdda2a3" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "790b440fadda515e72e7468812640bf2" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e6b52f1acec96b951a6c1fbad03f3836" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9f6826986ab773642c95ae0727a238a1" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "72918479845cabd9ffa18596933997d3" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "d3871774ecc0334832ed730a3b803a92" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5ac7b7b56162e160cc7fbed895453641" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "df4aa0c2c90330291d867080848087f1" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "843c606092d20d006e237743e1b739dd" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "4118d86c539f100ca839c4a74e4ea4e7" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "451e73739a5ed60e01b7d5b553b32feb" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f9462c6abbcd009163cbf6374cf2a6ce" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "16ee877a871f5623078c6995f5bf292e" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "81ee6ea436c74e926e878fe748c5b879" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "677d4dd18121d6c1384942cd5d0d673f" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6e33e4f3fdb5ccf53d52f2fdf9a5e78f" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "614c99c51f2c784bd2d9384a17b64f99" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "2d52e0fff04b1a2cf7d0d9c1e9557c59" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "8bd4d30299375dde2c9f29e55043c949" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "fac4acc8f18ecfa2a6e34cf89fcb499b" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "3fc58a97f6b5c1684d04182538e6cc4a" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "e8e12302324a85c6cbb293998057080c" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "34cfd38623478dfcc1c6094e1fb859d2" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "62e122e98cd6bfdc8885ff6bddff1b0d" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "edb68a54c9a80e48400abfdd44a3b901" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "7be5bff882acdfe8c31bfea11c85b728" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e7a561097d43b054dd924aecbb7e343e" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "210a9080788a15dd27871702d584d9ae" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "bf2e6a4447b8ec22d517bc6a2c98935e" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.48.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.48.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.48.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "91e87c91000cb343cf97335234778e89" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "47ff16ca99b30631b19038815b30df1a" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "38a9f63418bf283f40bc53cf7f52c677" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "2bf46302659f98a9c4e73603b0d6cb3c" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.49.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.49.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.49.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "e487cb58f97f0a467e6eb97524a28330" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "af3633b85dc2252e33357ee3a142afc0" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c323236c7d5ad90d938ec35d7380fb05" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "409f66d2c1eeb2b949360d040778e702" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.50.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.50.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.50.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "fc1311fadb535c9c3fedca2a6890738a" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "eac2ed57c9f5358dcc0d21b1c9efd8ce" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "29b3edf73bc7cc03db7d2dc8ea51dba4" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "da791060584b4716257f7375f4590ade" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.51.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.51.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.51.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "3a4071691b01dbd696bd14ef8238c705" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "68f5c10d8b1c5957fd340a285f48596a" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cdacb7674985f29c7ca6531013533397" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "d7551478b67f2887da11d67e1b20458f" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.52.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.52.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.52.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "54f7bc560548972b35f9dde8dd422cbb" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ae9ea3d6e6c71851067d17709d1bae70" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f55b48e6466d6c36d75c6131a081d90b" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "cc538747c1184efbee56864cadc86e89" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.53.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.53.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.53.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "c4eda8ac90b7eb8d82ba1102f7b60d4b" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "86692eabd67ea856a536849544c63548" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "34f7c76b2b978a3aef45b9002e859811" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "44af0c7745184eee20b6ed83bb5b8a07" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.54.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.54.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.54.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "af57982b4f1688b3ade911288382fdc1" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "3b8f14132a2e28a95810e0afcdc3abc7" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "24c2c4fab7e84045e0e74d5a3b417424" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "95098a6acf673ecc3aeec1caa4cae215" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.55.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.55.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.55.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "30fcd62f7301c9e64511e835dc76244c" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f57b06c96b3972c5c856d7749bc7dcc7" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "16a9a10c4a39e534f43fbf65eb85a630" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "544d25b93d84f058f17c7ce4f2ef7800" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.56.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.56.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.56.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "2d98ff2a2723ad0bfbe072ae977f904d" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "945b5232a36c056e0db5f14fb594f440" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f9889a8f3f5efa3134a4ea619e6685f0" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "549eaaea20964eff92cf341f352cdadc" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.57.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.57.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.57.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "3d18a3f5b7e0b20cfa76eb0324876a92" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "269eb10ca8b5772ce5968d5ed029581a" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e2e52dca6d1ad243420fe6a6d8699674" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "323ac3fcc94fc1934281230094c03901" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.58.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.58.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.58.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "2f5cfeb4aabde62f0e4b53f731c8791c" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "4168839b89b60ab8c8ad6c3d346ccc19" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e821edec5a7a89a983d06f63f32bc728" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "cc15b783ccafe63a3228ae0602699852" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.59.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.59.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.59.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "85a4c7747da90ca4a44916239bbf8434" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "dda52034a4ea687ec109ae909734ca2d" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "21ed06aed4398c4f1c92eadbf896e416" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "4b444b1fbcd358fc60acdc117674cfc9" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.60.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.60.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.60.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "fcefca8c351dfad6e373916cb69216b8" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "00a0751370f3a8ce196ff1082b052455" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "659600488165359cbdf8c3d1609ca439" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "10a2ce38adf3324cf4bf46db38cee732" }, { "dataPath": "params_shard_246.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.61.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.61.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.61.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "3499d5386f61358d3bc4a6e2721f7c7f" }, { "dataPath": "params_shard_247.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "746fb385b3863da326ff76b8c23f44ee" }, { "dataPath": "params_shard_248.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "10b7036f961653e8110430c4e781fb72" }, { "dataPath": "params_shard_249.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "5636ed4aa6a3d2f59940f33911258b43" }, { "dataPath": "params_shard_250.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.62.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.62.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.62.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "58c577d390e62c3fd306fdfc8efa2b1b" }, { "dataPath": "params_shard_251.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d9237342ed10fb225dea406295cd192d" }, { "dataPath": "params_shard_252.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c251fec419e1720f311df06e60cf1877" }, { "dataPath": "params_shard_253.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "9f4bf2c1fcdf7514c83293bc9e1a6c1f" }, { "dataPath": "params_shard_254.bin", "format": "raw-shard", "nbytes": 29526016, "records": [ { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.layers.63.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8867840 }, { "name": "model.layers.63.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8882176 }, { "name": "model.layers.63.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 2293760, "byteOffset": 27232256 } ], "md5sum": "e516d3cb8ba315555ad5d2eb418e8f68" }, { "dataPath": "params_shard_255.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 55296, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7a53f632cd0ab8b2ec1b0bb402683706" }, { "dataPath": "params_shard_256.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 5120, 3456 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8f32df5c5ce254ff05013edd5a52fe5d" }, { "dataPath": "params_shard_257.bin", "format": "raw-shard", "nbytes": 32440320, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 55296, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 17694720, "byteOffset": 14745600 } ], "md5sum": "9a2777c1a378b3f9fcd4a7594d72e8dc" }, { "dataPath": "params_shard_258.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "lm_head.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "5621516379304600cc71159de28b8bc3" }, { "dataPath": "params_shard_259.bin", "format": "raw-shard", "nbytes": 48660480, "records": [ { "name": "lm_head.q_scale", "shape": [ 152064, 160 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 48660480, "byteOffset": 0 } ], "md5sum": "d9ecc5cc7681d72b16e032a7b83849b2" }, { "dataPath": "params_shard_260.bin", "format": "raw-shard", "nbytes": 8878080, "records": [ { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 5120, 864 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 8847360, "byteOffset": 0 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8847360 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8857600 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float32", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 } ], "md5sum": "05481db171508742fb07f2572d1a3e13" } ] }