{ "metadata": { "ParamSize": 533, "ParamBytes": 9234108416.0, "BitsPerParam": 5.001536828453907 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "7932312201ae2761485f7d412eb0dbfe" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 48660480, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 48660480, "byteOffset": 0 } ], "md5sum": "e8920c096ece303715da29d808f834ba" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 20658176, "records": [ { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 14336 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 18364416 } ], "md5sum": "0b571f3cb6e619d96c6d40630c3be1ff" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0873eb6ddb15076ff9e499315425a609" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "538a9140fd4d851564a8e5686c890abd" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b459ee0e7734cf867044df5858581b06" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "1a9657c43d2bea8e04a216f3be1b8e0f" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1cee758ccca6cbe36975f5901632cb6a" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "23967261fbec14b496b7fab724ada7c6" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4747ff8880b109fc7cdac257c72e0909" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "a03c85f259f400f3ad626ed448f8de06" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5d1502cee18ca38bf9569a4f2900d035" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "7e822acd73e2feb8ca61f8aacdcc9f25" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c172f544d6b820912a5d1279126b07e3" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "ea7119633580bd91b68e726d11ce41bf" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "edc4493565017e3614f855e31a8b9cea" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "e70639e61c5a6f828e70f53c956c7a5b" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "54cce2674637e5a9a9b724d8abd43d5b" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "62990000ac40d33b5e13e80a6794e67d" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9cb8aa1c245fb018feb6cbc935dbfe40" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "87ecc2cdf173992e37c6e22b776da3c1" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8e353bc00035e776d9db63e7d5196bb4" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "70bcb66e8cf3b0af54f269770cb4a75f" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "25d76912d6a3c86237e5cbc316967078" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "49d47a73555153bb2507b20d9bd04774" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "cbdf427b1e0acda5b18d6dcdb636460f" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "e44ccd97c2fcb8364ca54290494bff5f" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ced8e045499bec3933817d6953afe681" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "817fa4fc575c020e227a82065a0c0ebb" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c5087115c379bdb08e441defb3c6884c" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "8626b889867fe49ec7f6423d10fbea38" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "08f7521d12af06f4dc3005b63467301e" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "9083a533bca8129b86a454936dd3be31" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "05e0fb5fa6e0a151dbbd29826d43d1a2" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "79999b6e37dfb6ff24960a6ff5eae8f5" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "80e4fc34fb71582c2691088376138e7b" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "b95bfd81c4a8d8176d170b1e6034ca7c" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "95f356828a6135b35c54fa222f55c305" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "52842279650a298ddeba0d927e2ffbca" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ed6f01edc94144612c873b5c04b7ed3e" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "d488cc2a2c313f52f333c0dc99274ee4" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d16baa55d9af207a1cbfb9976386735d" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "89e8cc5017ea54b8dfcb39190473b7b7" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "99a3e961945d71a0cec35bd853b33453" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "9c2be96534fae38bc36c210fa1c79ea9" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "2fe138d29ead13a985595e1bc8feb62e" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "3d3a7cb57e3525893704ef5b90577cf6" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "aa05a36c803f9c3f170792e037edbdb7" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "30273495e93ff39265054d6e5dea870f" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "e74089f716018d9bf10283b8d3ece561" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "16e47105dc1c0e85f0e994ebf39e953b" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8eb1ec86ab7eeb2a709f75a0bba77ee0" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "c0ec0c74c648b0f53a9097b90e280ae4" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "64aa5be14a58a85ed8a1bbe4489274c0" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "20f4b8fa85b48dd5d408c9006ea39730" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "763857f90cc3eab1994051b5d512edde" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "b7894130f44658e78d760cd5dc55703d" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "0831947aac962e67314d37cb09ddec32" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "a82e7b0dbb93f2eec513d953d6d64cc3" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f5065c02abe656e842bb387bbf34e03d" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "05e657b42e97ae7ef1bb1fb6e34bf234" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "51f7126252aa56c433965174f6ae8eb1" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "a99dc9d0a4f83c597d39f355d211261c" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "467aaf4204905861b799c59e892214c3" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "7b8d903b91bbd9922e546bdb5d793b3c" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "af905a61d3e62e1c40d17e9fab5bd16b" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "a94f062339b6a79734db0550f046531d" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "12c180ae705fcce191cbaff8f3cfbe4d" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "f4e950d83deff4a32cba546438d5929a" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "82674b200d22e463869c3c43a39b665e" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "73699ef754e40aa5b8873257abce71bc" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "985b5840b8ce0e375de207b3a5f8c0de" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "1745418f4676400a521012ed8ee6f076" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "33800e5c32e92342b639417acd4dcfb1" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "d8eff008543147da2e0b50398c898e0f" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4be508de3d9252438aba4f647de6264b" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "a0da1461b1c30b225d718ba3f0b7af80" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "f7d5d7e457dfd1682b881bc8680c3ad6" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "1d817f83be19dca2dd0aa5fb9005e0e7" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0fa9918f677c5a00f073d2e86ec8d7b3" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "80c2fad6fa1a0ba5d6066a9e259881f4" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "0c6cd38a76fd67e2d7abc5d9b6d2f4cd" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "943bfdca7c057b2f1a7d2686821ad174" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f9a846261b8fc87f398ab0a1cc8fb787" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "d618f1cae13906bee204a4da7da60711" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "6529c3c0446cb21b0526e3843b4fd948" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "15dbda57f274cf542618cd7d8f2642dd" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "84150ee0028376bdd451fc25900b4185" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "c8b8cea6013b424828512d64b2909012" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b74ca5b23f0fa5238378a71c52495bcb" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "8b428d8a7ed1dbbb3bd9f41a7781a026" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "08e69d08505661f7551a8c57d4316820" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "70b07c919d27e7fc5595380be4b158b6" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "51cd251c279e163f9a8b7f64b287ff37" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "bdd83cdfcc8c25d5df7d211b9abd1295" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8a4642756149f1b308b86fdd97419214" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "d95ba4f66725445a62546074ea53818d" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ff4b728677086705e97e96bdf9b6c29f" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "3d92c34837d04ac9473cd614178dbe9b" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ac4877752dfc1a86cbfe31f28bc6fdec" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "5788e8512001ff0cd3563119964fe040" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8cd209791b3fe3f4c7fb7c2a9f5bcffe" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "1cc83de03adc4be34a9f101d8458bed7" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "fd1bbffefbaf25795ef7f378c0ebe4f9" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "8b48d937058816c233df2b2cb7e21371" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "7cc7456591121088da5eee758d724977" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "fbe3172066f94391ec218094d47effe0" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9118afaa62dcbdcb41c31eadce88a92f" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "bf5757fbe6013d4e2b483c6d9d2042cd" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "23a0b2af24c9ce6480e89d7c4fdc3420" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "7cf3d78301737f206aefa5b477ff8fc3" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "61daafc8f43b59c6d308e2633a0895a6" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "2c26d21b1c63f596add98bb5143492c6" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "15ea8d0201aab14cc89e5cb37785f977" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "06cbe75a6c667f5efab284447f13a0e6" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d80b98733c25d4f79cdf2be5f080a042" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "abb4e1d206b66220eed4d8bdc889b08c" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8c8b216208b08dcaf88b2146c28bdb7d" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "6dbe8312f9b5e1f0982e8b892756f968" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "901d320d6c86427c0edf144d3d2a0b5c" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "2a24bc0e1dd267b68c7a1fe177d25d53" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3977a531dafa2b60aee26c7b7f5dd9ff" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "9d861f7c2991350e657600955ee7a929" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f79c4877efac93448bbce943e63e14a0" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "507be0ec8d8672267c475fbc32deef1c" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "605496ad25e251edbc9462e980b474a8" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "4a6c3e38d3efc654ab663dd5756ab24c" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "3a3dcfa6f5882671de2bcf6ef7713d15" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "5eee66ca8e0477316ed1c5f2d0514ad4" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ce5ec5b9485fff8f11f13600b17c2fdb" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "da8a6a861570c85ebd946ecb484c31cf" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b9478d35558d6401767c99879841eb01" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "3664490b9465d3195ff797bf2a9cd057" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d8d5493dbbd4f93991a39712e830a5f1" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "ec855fb0d071f621cd9a8cb25a548508" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b8cbdd03ec81e73ebdcade755d835348" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "def6a9c5e8f6dda0a4bee7f77c2a17c3" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "71b3d1f9f01f374a24e8addc59c1e02f" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "7b9fee89096d9c8dd9db27bbf8b9506e" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "df7bb42db4210ca3d9728960aed736b4" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "cf98ffea17c329afdf228eece5813470" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "962959737c5f9b032aa38df86940f7a4" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "d876828df99073e204111cbccccf1817" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e0f66ff197c46575e6ceed2562dac7f4" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "2e17099e223959ed75ecfed3e4a20833" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8ac3e43c814a1338d7aae590b2133039" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "a706c2e4e9360bf828d25e5215d0f8c6" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ed6dde365aea49fd12a58526df50e537" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "12046d09e74cf4be566b5ce91f8267d1" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "78ba5b4f37352c7e7367add8a4e04486" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "1a64525c20202bde63132e15e2bad4d1" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "aea9c0bd272b054b31b300d9a1cd80c6" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "8a7b7b3bc7accdc806222461d7931365" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "485e616559fe9c2c444f7cbbe2e66e1c" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "920dfd9b5496e048ea6fd82ce2b8750a" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c595d4e3df3def50b47418cfdff16e26" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "436a74975377a0e16e0efc983cde09de" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "0bd1df8b32b7f3ddd019cb2aae21128c" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "15d886d86562b7fe39046afdeabbcd32" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "ad01b50c27ca63ffe7f5f3df9bf8290d" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "edf503612f59edf7d6355eca975b6141" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "f5bc34da4a881064e2aff08a2ac7c1f9" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "eaa533f9ce5908ecf60ccd7dc80881d0" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f0fe216d86609c630df4af6de5aa0b6a" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "a8682b6da6ae4bb1b3df5ff71e63c39b" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "f8b230bc58165f171686c2c219e2b827" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "23e05cbb42f9b1c85aff4ff74c929800" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "c17b864491ea836dc561700086748c81" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "d99c2609a8e53e906b98dd4fdf4e0c31" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "19ab27129ee5a4329c92d614262339b9" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "965a5e9cf3b3dcf5aac6688a8dcbb28b" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9adaca8fa50e876723892fb3601b8b3c" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "f379aa793cb2c8da4b4f674767a64b53" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "0cc2e3704a25930074bf3c4146daf587" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "ceeb7fa446b82f5519611fb75bcb1e73" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "791b43015f4f501314ab722e388b6225" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "2cde58856c9af9c8ec0106197b148850" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "98190f3a4f88dede59f598ddb91b2b94" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "8e56f2a4be6b75ccf50158f14dd76acb" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2318d5f199693812f581fac9f1b04e6b" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "b19ab81c417bff65aeee25784e74f8a2" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "1346a74cb2b723f7dae8738092fc9e27" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "c4718063906715e3004929d9116bd783" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0cfd8b0f322d50ef664a345575f0b9b9" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "55510bc907509fa6c26b09d8d66e8879" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "32deb7b195fcaabff18a174d9bbed193" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "76cb6e7164301dd19bf705aff1f26e5d" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "736efad18492120448e43c776b7e9a25" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "fb3c5f9e9c7663b8d13882f22a0f2aa4" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 7168, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c287e0602a073d45d28d3d3f1dfd9dd0" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 30345216, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "bfloat16", "format": "raw", "nbytes": 14336, "byteOffset": 28037120 }, { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 7168, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2293760, "byteOffset": 28051456 } ], "md5sum": "934be68f81995621154452adad6676fd" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 27648, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f14e9a08a484031adf3a07f050e6ff24" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 35389440, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 5120, 1728 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 35389440, "byteOffset": 0 } ], "md5sum": "7939af8318c6096f44610ae6f411f696" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "lm_head.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "63a4219bd8f0d5088b95588bd567597a" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 48660480, "records": [ { "name": "lm_head.q_scale", "shape": [ 152064, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 48660480, "byteOffset": 0 } ], "md5sum": "f082397dddce00dec18bdcdf0448c1f2" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 28047360, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 5120, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 5120, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1638400, "byteOffset": 13107200 }, { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 27648, 160 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8847360, "byteOffset": 14745600 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 5120, 432 ], "dtype": "bfloat16", "format": "raw", "nbytes": 4423680, "byteOffset": 23592960 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28016640 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28026880 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "bfloat16", "format": "raw", "nbytes": 10240, "byteOffset": 28037120 } ], "md5sum": "97c68ec5e6f50b02655cd803ec64874c" } ] }