{ "metadata": { "ParamSize": 325, "ParamBytes": 4140998656.0, "BitsPerParam": 4.070120983102826 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 262668288, "records": [ { "name": "lm_head.q_weight", "shape": [ 128256, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262668288, "byteOffset": 0 } ], "md5sum": "322e9cb9ad2f221fc0457007eb472e25" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cc5224947ef910187fdd00af79d00b34" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "3ebe21dd1b0613ac8c7d45945881f18e" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 262668288, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 128256, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 262668288, "byteOffset": 0 } ], "md5sum": "746988d55bddfb4042023efdd48a2b8a" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e3922518d1b5aa74de9daff9745743ce" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "56fd863a49bbeb63a354fa7c8b09060f" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 21962752, "records": [ { "name": "lm_head.q_scale", "shape": [ 128256, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8208384, "byteOffset": 0 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8208384 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 8216576 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 9134080 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10969088 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 10977280 }, { "name": "model.embed_tokens.q_scale", "shape": [ 128256, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8208384, "byteOffset": 10985472 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 19193856 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 19202048 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 20119552 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21954560 } ], "md5sum": "1e91be3a1a42e388a365906a92ed92f5" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "66644d60c82adb1c76ba4ba6af931756" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "a6d4759a48803975f9e294a2be3c0b05" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "944c8ac0cd3442bb4f3a009fcbf1ad71" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "aef1345c072ceeef11d9908108b194dc" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "7d2d78390f65905b1ad4aba8605fb0f6" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "13f11c39d0419c62c9c550adcdc23c28" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e3ebaf4e77e94034032007489e05e9eb" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c8adabf4f94029899e64953d8153d4e8" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "7694fe9b9969b686dc9a0958830df1c5" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "805861b546341e277b984dcd349b222a" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "8c8c0cdfdae4b78dfae5ac301401a518" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "64970f5ba38655edd1a34a2d59ee8b86" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "70b7d0409e79963e85c6c4d00ebfa544" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d3eca0924eb1d682a962f4f4e0780790" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "4cce2cbb360c48908c89afe6d5521b88" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "3421d9246e1a3ef369ce4d4e788a953b" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "376571f8747badcfd2e24475bc75f5d1" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "cdb63ebda58e6f8a5329762357d33e49" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "a87f91d706bb35397f6df9151a2f798a" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "52923c082fa5b882f2c37cc7645c5e4a" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "be7537b85bef222f9b76c45994c60ebd" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "db809ad13edd81b6ab397a8f181667fa" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "cd4e578961f50e162b666f7aa6901140" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 24379392, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21626880 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22544384 } ], "md5sum": "d9c8d0e17122841b8adb6b9c32941950" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5de4095e978a1841d7427f632f259771" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "cdbd8048d2e0c8ca892c2f762e07005b" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "a116274a092073b6080def33be36cef5" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "60886d7e318b0b89dae9f89bbd78f054" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "6e7b7fc4c1e9cc4ec8dc134e15c79db8" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "5d31ce2688ba6caa487509aa467581ff" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "dfc16503b9fa13b770cf014dda8cbcdf" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "30768609886b8de4864d3b09eefa89cd" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "e12106253f5f470fabd359bb282a9996" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "12eedb3807b11f18a7eac0f216575b9a" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "fedbb79ecca543d1dfc75ea37fbcf8ab" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "97271e667a46bba8952702873833029f" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "77e9776b85490b44a05693bad8ddfc1d" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "6ab31c1f7aa9145aeb3649a06825ef15" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "de7e3d2b17b3ed9c125e422d245b6947" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "66fd5c0fe7ff6e027d5d487f9c522c18" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "f2e11dc64ee553371eec5bee4f23489b" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "bc8de0e0330842444a07bcc10772986f" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "5f2fc1d6efba7540ad72c1ccff1999ad" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "c1c9c25c834f66721cf211750c70f0f8" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "b485d330bad2386b9e6b0f2070c7fb30" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "f052a32d9f2ca21bc60a600f646666bd" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "aa8867c795bbb82258b4cba1b19b41c4" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "c6da1b93911eae094216010f53b692fc" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "339b5714d7d97552b90c97a540bebecc" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "9e1662df5ed024f0a2cac3f2ccdb687e" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "c9b24983b6f46da263c9fe68bc76b2f7" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d8273bba73327764bee207186b916989" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "74805d242b98d191c14e9b0ce528d005" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "a86d4b83341df07f7a15b4ed1190a9a7" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "6fd80347c4ce853e42a69ac862955e0b" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "b418e64e7a319b83f0facd051b3480df" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 24412160, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21635072 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21643264 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21651456 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22568960 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24403968 } ], "md5sum": "7e602ae58ce54fe82eb44c6c06809524" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "ac0f3ef2d59b834307100c2ce62f6aa5" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "0a72ec7f4824f760f4f93267aa9ac4d2" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "eca71404edc106eca71702fcdb1846a2" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d2b14a23c6a99dcd1135a6afa6635673" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "83d56926a5418a5b0912c6e339836f8a" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "7191970af97b07a651766561d4d35325" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "8505105f28517b1aa7e7c6b44ed1fb4a" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "d03599fbdb83879fd6df34b153856cfe" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "780df52b01c27de76f0e4146bd37dbac" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "b90ab5e3c8d1986a945cb4c6c2f9b6f5" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "11fd12e95dd08958e4d2583b75e534dd" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "91f8958a028df26068d43cad77c3bb44" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "e0ab1182f7aa306abc6d1796224fb7b3" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "bb85f061c5de412f1946102cb8849491" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "c6ce5b2cf1e96fbf35edc49145f1985e" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "26b29eb357d551f9e2ed0ee065af7a46" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "88f1f09e6207129a4a469f662e02f4ee" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "2d3da3eb913c22d1a8de686b8b29b408" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "d5768484d5737ea82836edf0a98bef9b" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "3217e0f511197f59dfb52b0c5e68a03c" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "d9358061859fe2eeaa4cb38b7f651588" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "48a914ef1e623fa520b38c5a813a9a64" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "5a2f3db32f46a0207cc5afcb6767e7b4" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "075a7734bdeb89afeb5122c6016f215b" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "0c1c4ffa61b435e1f268fc8a8c1aa102" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "edffa4961715461484d625c5e38b4df1" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "db84684302cd81ed79ea0e05650cf6ef" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cab9f31f5d7b606c77219887244e0c27" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "9019b14260c5d6a1874a00c10c295f0d" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "b1009edff42594d254046daa5ba256aa" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 4096, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 29360128, "byteOffset": 0 } ], "md5sum": "cf4152ad212bbb19fe922845c548d782" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 58720256, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 58720256, "byteOffset": 0 } ], "md5sum": "893b18120c579067e004fc357a70724d" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 24395776, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 21626880 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 4096, 112 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 917504, "byteOffset": 21635072 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 28672, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1835008, "byteOffset": 22552576 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24387584 } ], "md5sum": "e97011637ce8edb9e413ac1f0962bdd4" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 21626880, "records": [ { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 } ], "md5sum": "af9b0f4314dfb1903eb42aa1c76e5639" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 21626880, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 6144, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 393216, "byteOffset": 12582912 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 12976128 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 4096, 32 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144, "byteOffset": 21364736 } ], "md5sum": "cccdd8aa07da8c626788a96ebf58e445" } ] }