{ "metadata": { "ParamSize": 405, "ParamBytes": 6889985280.0, "BitsPerParam": 3.7492532385781288 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 335549440, "records": [ { "name": "lm_head.q_weight", "shape": [ 131074, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 335549440, "byteOffset": 0 } ], "md5sum": "db78291eb014af038fa07a2e91aa8360" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 41943680, "records": [ { "name": "lm_head.q_scale", "shape": [ 131074, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 41943680, "byteOffset": 0 } ], "md5sum": "4999c2b98bc2f7b8ee40c83ad7fcef75" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "d73cb9559f82814e6b510145c242a80f" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "a4c9864927a98dde955d12159be007fa" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "f7d97c6760c6d833117b3b510acbc148" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "bc54b4f002db3591a61b81d6f6a54a19" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 27566080, "records": [ { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 0 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 10240 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4597760 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13772800 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13783040 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 13793280 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 18380800 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 27555840 } ], "md5sum": "b8a04d0a01feb02789e3283da8936720" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "6b9db6614a617fd546e8e905a5f105d1" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.34.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "866200867cbb5d10a979fd7892b9999c" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "4f79f7983d201a8dce15e15fdf7916a5" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.35.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.35.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "5e74bd880519d659d057d77a56ff24c8" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "412dda60c801471a780f399a988b2919" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "476bbcbb54097233de7d2cb489d15794" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "5eec38a3c77985c4c0cd66804e830262" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "4d01d1bf52783c988e87f550abbcc090" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.36.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "213e0e474a82b47304ef5fbb2b767503" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "4d40f445a0f8e6dc4ada73a32cb0f2ab" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.37.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.37.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "79c5d46c91a39b84cb32c8ba0ef9502d" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "b1cd902bd4d8b08067ad268a857a787a" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "19df61783893853769e8abd96b3d4c62" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "9fc0809894ddc62c4e2f00c894aa76da" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "4eefbfd4eb89612ae6b075c2a683e916" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.38.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "890c182c222cdb69b51fc4dbfd76806e" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "aea5912b31c462f6d8fe95f93520e518" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.39.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.39.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "bbba77ca7489da35a5bacd17fc679c89" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 335549440, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 131074, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 335549440, "byteOffset": 0 } ], "md5sum": "b62670bd3b84682906c682dadb19d3f4" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 41943680, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 131074, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 41943680, "byteOffset": 0 } ], "md5sum": "aa5d5ada9a33f42e6d53ed8232aa9192" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "96da8323a71c73fbcbd0a4b1900450e7" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "2fd9a9d056911c87ea7a6630c4fac484" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 25589760, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11806720 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11816960 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16404480 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25579520 } ], "md5sum": "10f103e28206fd78ba4a65762e60ced0" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "0c79bd833fadae82a6df52554a429d26" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "d248082a0524976cc036616f24e8982f" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "924d7ef38b960fdde12db9393a0eae53" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "c796d3da5d128dffbf37c37338d0862d" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "43c9fa0e76a20d770bb344fb88ee95f1" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "4a124b6ca399988e21431f436d29c9b2" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "e5a7e927c8533f61424753d38025186a" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "2237c490f88c0dcd88a4a0067f7ae3b0" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "34da957f897e047a8d990d67ca31e1d1" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "27f24057ae53038471f8b7309f74e5f3" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "cc8c4387e18bb91b9c92a350506332dd" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "669d1868519cc33705c2d2e6e5a5a7fd" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "8239db8c5f6951954711181525692c96" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "e70c7dbd8bd5162ff30c99aa52f3e077" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "01948792246722a49f47feceaa9c5636" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "a7936bd3e076b6c4e4f45afa3f8870ca" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "6b6c9bade904c34ade5754e372c91b59" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "e233e1f6977bdedd0a12a7b54843fc3b" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "3190f577e640692ca7d3708c7c81b533" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 20971520, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 11796480 } ], "md5sum": "2c448e9b36dcdd7a017fbf432f9b74fa" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "2d7875051a2e750fecba991c5bc28706" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "a0946d89fe17fcf0734cbdcb724bd122" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "6214c25ca3714ec38f78a90f8d08befe" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "57e22e1e7b96b08a6d1e0da99baefff9" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "4decb383460f1b3fb4a62cdaa7b6d1c0" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "dcb380572ffc08fdd3ac598a17c4ea88" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "b94c834cd43a91f58fbd76da38ce6486" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "5f1a22890c9a32b3d8575ae7cd9243e5" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "9e4a3b66c83bb3b5124b5b1fd2c4a9ae" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "899899edfe2ac567990a0de50b0a211d" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "9e5ca5bdd1a920b21bde7fe848dff646" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "8b61a572048a0bdc960b3a72b06d3dd1" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "6fac9a5fc7ff1cabc352ed6c7a2bfe8a" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "3a91a62efc912bb245c7c28d64ab1845" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "ca3922024c4530043bb85b26b53967ec" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "cab94525b2af2c976afd8b6e2f319456" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "0e6af51998a14bd9414a88a34a17969a" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "833387887b11fe5be9e00d6495adf853" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "ccd7c24822fca0fc726fee7066929d84" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 20971520, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 11796480 } ], "md5sum": "3b0668126460e207e605f7a823dd29cf" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "ded7d584f6dfffdf9f34d443b63e4094" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "fe04781cf875485146012d0006934030" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "cdf843a4f64cac5b8ae665330875caa3" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "3fce6b5b8ab72e6cfa4f4c46ea94c93b" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 18380800, "records": [ { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4587520 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4597760 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 4608000 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 9195520 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18370560 } ], "md5sum": "68e816021d1a7cc5df6067b98441b3b7" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "8d2fd541f73b751725d3adffa3bedc72" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "d677893053778aa485501b7c2b90caba" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "da122eb90ce119f7a3e0623e3e5a99c5" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "52f7117acaba61d28c0aa7015f4aeeaf" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "dca5928c223e7daddd2e5896b2629020" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "06c197b1ed06bd686b7b9d98dfaedf9c" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "180a3b9d3711ab812a9163618c920b29" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "784308935f52d5bf78a621f25a7ca7c9" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "37980a310c48c6bad8b578752974be11" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "56d7a76231c1682666ce2be10d4329f6" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "9c348534e88a976c7a28592591d00e53" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 18380800, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4587520 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4597760 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 4608000 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 9195520 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18370560 } ], "md5sum": "10aa43b54c796a2f33b4ad428da2f7c1" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "14fe297d6b85e532b45edd49ef252dc2" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "d95dac167f0fd4894d35fd6e0d7ad7ea" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "da70170bff4b2639b20715c8fdaeb8aa" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "677869b6f65d7b2184e76cd774c8f792" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "8e8872dd06ddb8744ecb0a696b9585fb" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "25f2401c02c8a597ddd67050741b5338" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "d9f29e0232e1f0fbb0f6fd0d3f5fc6de" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "25050a28ff0adafb819015f45405e976" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "8d7bb5827b0d4c19af1dd5166485cc87" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "43257dba1c834560c25625083d53843c" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "c6570d27efec8b6cdf556c679447b50c" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "3e1e483f0ba84b92ab6151be8f88815b" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "a022c03186daa5ba50875f05801a7704" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "9e1f76cc8fece17fb46bcb5d86081da7" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "0ff2e1f5c81e93d47cf6787f8fc19358" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "961654e379c52ea61a7f0f8ffadc4e51" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "880886b4f5411028a35ce5ac130562f6" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "9298955b01f51dc939f0740f078d5c48" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "a0f3d76db3acd0d68f15e1fd24f06e91" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "bc112df594f1d53628b65be08a4b2c12" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "abfd9e6b0f4295a6999c02390f45473d" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "e91b6e480ed5723f132231a5ad8b794f" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "557b6452c9ce76c797f21d1f96fc4934" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "f7e45dd9cc34646825cba8c9ff60213e" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "f0e89fcc3c265f5a059d3ad3548712e7" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "d8dde43508eb32e5fd97579b4dd4fe2f" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 20971520, "records": [ { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 11796480 } ], "md5sum": "30ad0150f469bf1e8461058d124ec475" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "96a7dd31fa9fe766579475422630e408" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "264ab675ae47c1f380c338e7cac3186a" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "5b7794d6d02454706e760b3a804e53f7" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "5871645108c7b28a4fa698c00df0e620" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 18380800, "records": [ { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4587520 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4597760 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 4608000 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 9195520 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18370560 } ], "md5sum": "34c28a2a166289aefc514bf64d4a4252" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "f0ba3e41ed9f36beafed1f3df9c5392b" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "2caefdc9604bd6b7273e11a3653f77b3" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "84b3f7ea1013316dce982899df822510" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "059d323e1fac57f89c2bb69376e01033" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "62fe79c73990ff1cd74204728d4afcca" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "16e5e55d6e0747adef61868cd992c62d" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "ec20f0e7839ee55bbd1c55fcc87923f0" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "5606a1344f8f29b44a569ef1d112aa09" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "4ce87e1c991141005af1fe423cc3fad1" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "532d9602e597384a3e3e507f580711f3" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "5a58218b55509ebca9514c4abaaa3905" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "2bfb71b3fea978c4a02d7666f5184dc8" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "fb7260ae46cf967cffb43765f87d16b6" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "294fa807d1043cce85e4a5883c101b0b" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "2bc670e1dc616be1fe31b021d61c2bcb" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "7b32958d57893e694a1ba707173564bb" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "f12e706e40e522a916ec5e12a79e373a" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.30.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.30.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "c4000583f301a730e677f0e60c8c6fe1" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "d3366c859ca3f5eb1e3e50c38120530f" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "84bc72b02dede30af952b5db90c24575" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 25579520, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 11796480 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 11806720 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 16394240 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 25569280 } ], "md5sum": "77d8070186a21fe6e8150765a9ae7e30" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 36700160, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 5120, 1792 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 36700160, "byteOffset": 0 } ], "md5sum": "80c1f8b410ed7c19525c68b14b2aab16" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 29501440, "records": [ { "name": "model.layers.31.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 15728640 }, { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 17694720 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 28180480 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 29491200 } ], "md5sum": "4546fa630ae161482194c93b4999130b" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 73400320, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 28672, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 73400320, "byteOffset": 0 } ], "md5sum": "81864ddbf9ffe36f701a6be14945b3a7" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 31467520, "records": [ { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 5120, 448 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4587520, "byteOffset": 0 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 28672, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 9175040, "byteOffset": 4587520 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13762560 }, { "name": "model.layers.32.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 13772800 }, { "name": "model.layers.32.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 29501440 } ], "md5sum": "d4823f8d736c710e4ee4c880c7459388" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 29491200, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 }, { "name": "model.layers.33.self_attn.qkv_proj.q_weight", "shape": [ 6144, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 15728640, "byteOffset": 11796480 }, { "name": "model.layers.33.self_attn.qkv_proj.q_scale", "shape": [ 6144, 160 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1966080, "byteOffset": 27525120 } ], "md5sum": "13baafc1d18c270f0c95671e53e90648" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 11796480, "records": [ { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 5120, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 5120, 128 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1310720, "byteOffset": 10485760 } ], "md5sum": "9434e998ed3037fbace2915566aa77b1" } ] }