{ "metadata": { "ParamSize": 325, "ParamBytes": 4083949568.0, "BitsPerParam": 4.068559606592764 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 262668288, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262668288, "byteOffset": 0 } ], "md5sum": "90455a89d1f126d08436b3f9763bfd45" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 32833536, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32833536, "byteOffset": 0 } ], "md5sum": "e6b4a4d9bea78df69d0088aa994566eb" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "9b778597ab5ca87dc01e04221c45cb5b" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e8e1bf18bec705ae1e772b4730919929" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "fa6f164217ade3bb34c156c4f286ab9c" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2c6936ad435ad1c6b2ab47317331f196" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3cbda99e7efd5da49968b3c299407eee" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "e3f093b5b387544fcd0b3c7b3e6b6e04" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "b9d3c360a92dc0284f8ba7d37ef19d8b" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "42c0f06465b4d6e32469cca8acff7b25" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "b1a8ac60c0737659a44c189f115950cf" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "fd3581a9e811c8dc1f233e304b4a13b3" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5ea15f4743ff8e15b00df1d3dfde33a8" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "51e49874df52cca7a4ba61e800b151ce" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "1f438e2afbf2aa85acc612075229ece2" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d225be884b5de1ca07c9fdfdaa1b72fa" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "ab1a7cb113084a2bfe82609ec0f270f0" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "a9d1501cdb20522d8578d762010a9e66" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6a535989520729fe1ad108283412617c" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "56d48fa047896109a0843b116fd56699" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "8e65c82daf533f41d1178a896edbc0ab" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3756430dec29dcf5749119711e7cde01" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "79d548683788092e7d319bba15560945" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "f26b83e431294734057c2db0763c85ac" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2cced24fdc07387133855b2a83ef9216" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "9d11449dff963ae5177bb3133fa296a7" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "a8f516e7e20a894f60295b4c5b9a4333" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "58333f98f65816979f05aa195e553680" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "3d3102e49dca6b029257844d01fc5e9e" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "ddaa1c46e87cce229d39e9c1d32c09ba" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ceb6060ba03935f48d9cf64f5615cc7f" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "ecaa5bf55729eb0234746f0ca6874f57" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "129f390132725a18ce5e8657be5f967b" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "edae9498b651722087226b37f3a9aa73" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "e0617218474b6197a73dca78fd532b4f" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "46f9b29351334ec7067cf3439a14d3b7" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "79ebbd1a6cbc5773150c36ac23d1772c" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "8e4bcbb077f78ea3f6b767a517e66719" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "879e51c4c5239b702d24a326cb464842" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "1b827c0ecc6a5767977f99e39d9c71a7" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "2b9439ab59b80669dc37fc84c840d856" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c139c737a002ca943d9dd04d968e0433" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "2b3704b19fd8a5d3ef619b263984594d" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "b3b3cb890e090d651989c819a6623552" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "03eb592d6b98a4b8c412110c43785637" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "45524aef6c0af6d870b48e877b68026c" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "28653197dfa7031f95d4b0080194fc85" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "16c6de779331e7eaf86b3cb8c618138c" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "9efd65a7764ffbb32af9513533883a8e" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "ce2e038c019ccf3a617b095878179a35" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "76b2c7179bf554546b5856870c6562a4" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "419ccb3238310e2c1992323ef6d718a0" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "3dcf7222ed7dfe4a544e64df5af9c89b" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "5b3d755084a5092b8d530d13bab57f2e" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a995bfca9163b9e9c7f15b3fc0ce76f7" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "a7917ff1b8e96d376ac45a7eabe663b0" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d5df9afe508df9e79349479bf213579d" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "49955828a76f4de79a3e726e4009544e" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "5c94d5510d687646d5203149894ee6ef" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "764e9984c4138bbadb1c8d04e4bb6dc2" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3d467cb74d79163ce0814cab942e616b" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "04ea55217a41c9b86ca838a255ff2e99" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "dc792cb72cdd3b0011cba2a0fcd481e6" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "019cba3ae7e301958093d326420a5733" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "4535d011c332afec1cce45b81a7b9a3b" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "fc22e0cbefe67293eda818abaad18ccb" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "944637e4ab87497197dd5281cbf45946" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "a3eb9dc9a7fa1e1eb2051c726c067294" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "304476e20392ae34bb400246b52f76e0" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0c36d92068dd2d6e5a19c5fc2bd58a53" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "a865e007db83e6c0445a5759002d13d7" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "8bca1047c80d899cf56a354f902c1749" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "03a2f1bea6d99dae9d63212f312d4ee6" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "ce9afcd235e30ca37998c4c9cf1e2495" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "74bbc3c83436d6221751022e64de7bbb" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6a45b1511a7d226f881fa9f7719860b9" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "18c4ac71cff5ba0bcb5366277594ddc7" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "90de58cdb3d75ffb100d660740160c9d" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "465e2a5ab8372fac9d9944063e73dd65" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "0bf40d9d5464f14a685b1ed73e7b8f60" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "1adc3bde7e5441ae2b59a861ebd2e960" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "bde1e9a40dc8321dc28792d8a3d35585" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "f6997bdd67d2f53290979c1da5b523bf" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "76822e192a5f9c866542294df1600e0b" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "faf0b05547e7c32ff1a7cf03b5776c89" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "a45054b9e2efd9de6b862bf96a2a4960" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "7a00512156ce6043af21422e4ad1409b" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0f3a7ade09af0b45bed61a82dfd024c1" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "24dc66cd9d9597f05d4849c97839dd30" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "07672ad529dd72b8a79faef3f6bc36b8" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "4d46cf56bb86b01c0ea48c13d4bff904" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "8853c2a39243f41eda78a7e083b1a240" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "2f10dedfd2699926d35e9abe35decc96" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5f542b642be02c5549fa56e01fb9806a" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 21073920, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 } ], "md5sum": "169a901be44149d823b232c80c29af13" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 4096, 14336 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "a11fafedf9e099beb3dc30a4e63f6d5c" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 14336, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "46d455edf96a732c75ea10d3468f5bca" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 262668288, "records": [ { "name": "lm_head.q_weight", "shape": [ 128256, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262668288, "byteOffset": 0 } ], "md5sum": "6319b5906b36a200a67e2a99678185e0" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 32833536, "records": [ { "name": "lm_head.q_scale", "shape": [ 128256, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 32833536, "byteOffset": 0 } ], "md5sum": "51becee22d4a53fb23483900cf2507ae" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 21082112, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 4096, 3072 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 1, 6144 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 2048 ], "dtype": "int8", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12595200 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 20983808 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 1, 28672 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 57344, "byteOffset": 20992000 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 1, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21049344 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21057536 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21065728 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21073920 } ], "md5sum": "33788ae8cdaa0e8cde1145afef291ecb" } ] }