{ "metadata": { "ParamSize": 325, "ParamBytes": 2149644288.0, "BitsPerParam": 4.500600961055312 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 49250304, "records": [ { "name": "lm_head.q_weight", "shape": [ 384, 32064 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 49250304, "byteOffset": 0 } ], "md5sum": "15573574494a1c2a922401367f6e7f3b" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 49250304, "records": [ { "name": "transformer.embd.q_weight", "shape": [ 32064, 384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 49250304, "byteOffset": 0 } ], "md5sum": "e0811e2336da65b98ffc27b4f7a39c39" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.0.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "9e1718e1a2c9d04f784dd13251068e06" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29626368, "records": [ { "name": "lm_head.q_scale", "shape": [ 96, 32064 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6156288, "byteOffset": 0 }, { "name": "transformer.embd.q_scale", "shape": [ 32064, 96 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6156288, "byteOffset": 6156288 }, { "name": "transformer.h.0.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 12312576 }, { "name": "transformer.h.0.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 12318720 }, { "name": "transformer.h.0.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 24901632 }, { "name": "transformer.h.0.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 26474496 }, { "name": "transformer.h.0.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 29620224 } ], "md5sum": "bd5a82a471ecb3709fced74240e64daa" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.0.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.0.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.0.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.0.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.1.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "2ce2ffd951c16f81e195ad82adafeaf3" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.1.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "52acd15e560771152797516cf23d4a86" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.1.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.1.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.1.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.1.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.1.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.1.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "f2201abc2c2a00bb9b5ffc47f5f2f048" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.1.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.1.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.10.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 15925248 }, { "name": "transformer.h.10.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 20643840 }, { "name": "transformer.h.2.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "903424f53f8181150a2c97782523121d" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.2.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "950e99745855d97444d74fc0999198ae" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.2.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.2.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.2.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.2.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.2.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.2.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "a1aca00fe22b6c3706713a4a3c2c32f8" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.3.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e6a9e8a2719b562f14a6388b6be3c47a" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.2.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.2.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.3.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.3.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.3.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.3.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.3.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "f2807554a7bd68d065f589925a476b3e" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.3.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.3.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.3.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.3.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.4.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "226e3ddd91a536e72e0d86aa102b1658" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.4.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7417492794a7ff51deb3fe304568624d" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.4.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.4.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.4.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.4.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.4.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.4.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "d46380ecdfac1a45319de83385dc0cd1" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.5.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "7ae5de5764ca1ab91e6be02c1fc30283" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.4.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.4.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.5.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.5.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.5.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.5.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.5.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "14af8ee1cd1d05e7c08d113b775e0a27" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.5.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.5.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.5.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.5.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.6.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "fdf4ff1e395c92a73cd0e40c9ec27342" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.6.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "665d057265efabc5a14229dcab48e4df" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.6.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.6.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.6.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.6.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.6.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.6.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "0b0b6441c84f3aeb0c61cedfd90ad315" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.7.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c32e87bb5ee19319b7d5acdf94182ff5" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.6.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.6.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.7.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.7.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.7.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.7.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.7.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "1b3ed3105b2609504fa3ebe8837e0fd8" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.7.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.7.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.7.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.7.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.8.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "609f5d292a8c6132c347d9ba4259c595" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.8.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a5fcbd6db3bc8894eaa324effcc30f3e" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.8.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.8.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.8.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.8.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.8.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.8.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "3075868c146f3d606875d95146083e55" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.9.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b02f95e2c945c37c47d77673c20d2a0f" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.8.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.8.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.9.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.9.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.9.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.9.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.9.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "5e06fc16c0566c86a1cf77f717a32549" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.9.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.9.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.9.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.9.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.10.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "06016424bfad20babf456c7a209eee9d" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.10.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ef4f3045d0158a0a6c43e003d9509e68" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.10.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.10.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.10.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.10.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.10.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 17307648 }, { "name": "transformer.h.10.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 31463424 }, { "name": "transformer.h.11.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "5928e3d92f272c8e0bfc892651acc8e6" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.11.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "586979be478c0f43f0cd5027e5d635f1" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.11.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.11.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.11.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.11.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.11.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.11.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "177818b181b1cddc040d9e98e4b152cf" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.12.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "75035088fccd2fdf9b286db8dcd695ca" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.11.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.11.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.12.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.12.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.12.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.12.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.12.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "c6436cc7b359c6921468f8c11113ad6c" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.12.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.12.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.12.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.12.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.13.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "4fd8f7d49e0c9a98e7cbb1b3ed07bbb6" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.13.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "69bb615a8cc5b6b1a360873625cc0ebc" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.13.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.13.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.13.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.13.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.13.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.13.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "ed0055ab7deeb642f934b774b4873926" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.14.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fce1df62c027e82452f8cfb3de614e17" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.13.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.13.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.14.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.14.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.14.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.14.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.14.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "258279b74d92017dfd76fc9d71832afe" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.14.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.14.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.14.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.14.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.15.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "a10c286dec02cd62731f2a8f5b40948f" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.15.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f9d537894e32d025a97b1ff4e041b919" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.15.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.15.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.15.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.15.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.15.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.15.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "67186b9e5b16dfb0cc6db006541ef25d" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.16.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "cd99d4c3e6595ee11923f08e4b9e5051" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.15.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.15.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.16.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.16.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.16.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.16.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.16.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "279febdba88062445152bd541cabe169" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.16.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.16.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.16.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.16.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.17.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "ad70981c0933857f5304c1440c082028" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.17.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "af3c87923ccbf1fd7e04d6a557d67c03" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.17.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.17.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.17.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.17.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.17.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.17.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "4c4985708900e1978445947ab4a47d74" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.18.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c089c839333f4b506fa63ba38a8a44b4" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.17.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.17.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.18.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.18.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.18.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.18.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.18.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "00166b2be13de0c94e54aa0f1d1e1c74" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.18.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.18.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.18.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.18.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.19.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "b50dc7481fdbff5934f303135f91dd5e" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.19.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "798819c8810b29f471215475220d6402" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.19.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.19.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.19.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.19.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.19.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.19.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "13a5006b8096959aa5eb8435da420d64" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.20.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4082883dac89fe84d2d68f3057d1e2d9" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.19.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.19.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.20.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.20.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.20.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.20.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.20.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "1fa884ad427cc240c8383d3660e78a3c" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 26548224, "records": [ { "name": "transformer.h.20.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.20.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.20.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.20.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.21.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 21233664 }, { "name": "transformer.h.21.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 25952256 }, { "name": "transformer.h.21.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 26542080 } ], "md5sum": "912d990dcfe88deaa49a1c525625760d" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.21.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "53ab4ed844868ad43af81f525209a670" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.21.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.21.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.21.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.21.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.21.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 17307648 }, { "name": "transformer.h.21.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 31463424 }, { "name": "transformer.h.22.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "5f40983fa7292420eddd84489281151d" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.22.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a9bad0ae1dec137c0226b6365882b88f" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.22.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.22.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.22.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.22.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.22.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.22.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "bbf5c3dc8162fe4586b1d0e998061bc4" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.23.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "53a5181919b67f6574510e290cea9d94" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.22.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.22.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.23.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.23.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.23.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.23.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.23.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "602ce327040a2db0ff74d71f5ef5e8e2" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.23.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.23.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.23.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.23.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.24.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "aeb7de38e599f30296ab132386276ab6" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.24.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "22c564e1797995d74b43a54da608867b" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.24.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.24.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.24.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.24.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.24.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.24.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "6ab2be978356555983dfc998efb6b627" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.25.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a1e621ca08abc4d817808a0b09abb5d1" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.24.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.24.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.25.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.25.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.25.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.25.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.25.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "57c3909558ea9b3dfc48dd6840fe6f0c" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.25.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.25.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.25.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.25.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.26.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "813794b63ae1b1c5f4966bd327faec1e" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.26.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "761284c9c3a22c720eb86e459d898196" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.26.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.26.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.26.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.26.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.26.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.26.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "7647aea0f4f12c6a640ae55c72c7dd0c" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.27.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8c2d523214585706470f8ce2e5c38147" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.26.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.26.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.27.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.27.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.27.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.27.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.27.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "09c3e0ab1d9f1069a1747a86202eb39b" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.27.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.27.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.27.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.27.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.28.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "0b0383b27c5235b4c4b2b7b8be0d951f" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.28.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2a9d1bb3c1e46ec0a53dcdff1beb16bf" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.28.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.28.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.28.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.28.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.28.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.28.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "319ce038328f53acac2541a93e11d23c" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.29.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "47b56c5e0b7f7348b9da30515e2002f3" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.28.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.28.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.29.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.29.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.29.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.29.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.29.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "73837009f3d9972f22542d1a4e6f3006" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.29.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.29.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.29.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.29.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.h.30.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "3882c3ee62684d5a1f1742da3b0126b8" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.30.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "954ff7887fde49db4eb85753a15ee426" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 22616064, "records": [ { "name": "transformer.h.30.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "transformer.h.30.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 12582912 }, { "name": "transformer.h.30.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 14155776 }, { "name": "transformer.h.30.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 17301504 }, { "name": "transformer.h.30.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 17307648 }, { "name": "transformer.h.30.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 22026240 } ], "md5sum": "53175c84e1a0ec3d27e3922c65824386" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "transformer.h.31.mlp.gate_up_proj.q_weight", "shape": [ 384, 16384 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b473059eeb4d6e99e788b00135e9f5b2" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 33239040, "records": [ { "name": "transformer.h.30.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 0 }, { "name": "transformer.h.30.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 14155776 }, { "name": "transformer.h.31.ln.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 15925248 }, { "name": "transformer.h.31.mlp.down_proj.q_weight", "shape": [ 1024, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 15931392 }, { "name": "transformer.h.31.mlp.down_proj.q_scale", "shape": [ 256, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1572864, "byteOffset": 28514304 }, { "name": "transformer.h.31.mlp.gate_up_proj.q_scale", "shape": [ 96, 16384 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 3145728, "byteOffset": 30087168 }, { "name": "transformer.h.31.post_attention_layernorm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 33232896 } ], "md5sum": "97a0d6248081f611e993c117e0afbe0a" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 21239808, "records": [ { "name": "transformer.h.31.mixer.out_proj.q_weight", "shape": [ 384, 3072 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 0 }, { "name": "transformer.h.31.mixer.out_proj.q_scale", "shape": [ 96, 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 4718592 }, { "name": "transformer.h.31.mixer.qkv_proj.q_weight", "shape": [ 384, 9216 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 14155776, "byteOffset": 5308416 }, { "name": "transformer.h.31.mixer.qkv_proj.q_scale", "shape": [ 96, 9216 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1769472, "byteOffset": 19464192 }, { "name": "transformer.norm.weight", "shape": [ 3072 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 6144, "byteOffset": 21233664 } ], "md5sum": "42635a8a732902859728eea1e913d670" } ] }