{ "_name_or_path": "/mnt/dolphinfs/ssd_pool/docker/user/hadoop-vacv/lidengjie/projects/fsvd-llm/cache/llama-7b-hf/models/stage1-bf16-usv", "architectures": [ "LlamaForCausalLM" ], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "linear_info": { "model.layers.0.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2023 }, "model.layers.0.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1124 }, "model.layers.0.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1139 }, "model.layers.0.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 94 }, "model.layers.0.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 538 }, "model.layers.0.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 83 }, "model.layers.0.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1063 }, "model.layers.1.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1644 }, "model.layers.1.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1288 }, "model.layers.1.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1319 }, "model.layers.1.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 207 }, "model.layers.1.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 945 }, "model.layers.1.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 211 }, "model.layers.1.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 762 }, "model.layers.10.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 193 }, "model.layers.10.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 162 }, "model.layers.10.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 196 }, "model.layers.10.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 94 }, "model.layers.10.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 32 }, "model.layers.10.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 113 }, "model.layers.10.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 99 }, "model.layers.11.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 273 }, "model.layers.11.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 219 }, "model.layers.11.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 268 }, "model.layers.11.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 54 }, "model.layers.11.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 34 }, "model.layers.11.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 62 }, "model.layers.11.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 133 }, "model.layers.12.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 46 }, "model.layers.12.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 84 }, "model.layers.12.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 66 }, "model.layers.12.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 100 }, "model.layers.12.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 63 }, "model.layers.12.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 115 }, "model.layers.12.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 156 }, "model.layers.13.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 270 }, "model.layers.13.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 247 }, "model.layers.13.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 278 }, "model.layers.13.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 40 }, "model.layers.13.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1 }, "model.layers.13.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 42 }, "model.layers.13.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 55 }, "model.layers.14.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 678 }, "model.layers.14.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 467 }, "model.layers.14.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 613 }, "model.layers.14.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 271 }, "model.layers.14.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 702 }, "model.layers.14.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 265 }, "model.layers.14.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 682 }, "model.layers.15.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 996 }, "model.layers.15.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 665 }, "model.layers.15.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 826 }, "model.layers.15.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 126 }, "model.layers.15.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 359 }, "model.layers.15.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 152 }, "model.layers.15.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 455 }, "model.layers.16.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1673 }, "model.layers.16.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 978 }, "model.layers.16.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1109 }, "model.layers.16.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 128 }, "model.layers.16.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 504 }, "model.layers.16.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 159 }, "model.layers.16.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 469 }, "model.layers.17.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1545 }, "model.layers.17.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 905 }, "model.layers.17.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1087 }, "model.layers.17.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 262 }, "model.layers.17.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1038 }, "model.layers.17.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 327 }, "model.layers.17.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 785 }, "model.layers.18.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1414 }, "model.layers.18.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 851 }, "model.layers.18.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 984 }, "model.layers.18.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 298 }, "model.layers.18.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1130 }, "model.layers.18.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 285 }, "model.layers.18.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 857 }, "model.layers.19.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1348 }, "model.layers.19.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 819 }, "model.layers.19.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 999 }, "model.layers.19.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 160 }, "model.layers.19.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 648 }, "model.layers.19.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 151 }, "model.layers.19.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 497 }, "model.layers.2.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1450 }, "model.layers.2.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1014 }, "model.layers.2.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1167 }, "model.layers.2.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 257 }, "model.layers.2.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1176 }, "model.layers.2.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 205 }, "model.layers.2.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 862 }, "model.layers.20.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1412 }, "model.layers.20.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 844 }, "model.layers.20.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 965 }, "model.layers.20.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 151 }, "model.layers.20.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 685 }, "model.layers.20.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 166 }, "model.layers.20.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 587 }, "model.layers.21.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1355 }, "model.layers.21.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 833 }, "model.layers.21.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 908 }, "model.layers.21.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 137 }, "model.layers.21.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 472 }, "model.layers.21.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 124 }, "model.layers.21.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 393 }, "model.layers.22.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1113 }, "model.layers.22.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 667 }, "model.layers.22.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 737 }, "model.layers.22.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 92 }, "model.layers.22.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 130 }, "model.layers.22.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 89 }, "model.layers.22.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 150 }, "model.layers.23.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 856 }, "model.layers.23.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 478 }, "model.layers.23.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 586 }, "model.layers.23.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 168 }, "model.layers.23.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 516 }, "model.layers.23.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 144 }, "model.layers.23.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 356 }, "model.layers.24.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 435 }, "model.layers.24.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 242 }, "model.layers.24.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 319 }, "model.layers.24.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 96 }, "model.layers.24.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 440 }, "model.layers.24.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 93 }, "model.layers.24.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 297 }, "model.layers.25.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 147 }, "model.layers.25.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 121 }, "model.layers.25.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 122 }, "model.layers.25.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 11 }, "model.layers.25.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 7 }, "model.layers.25.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 14 }, "model.layers.25.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 13 }, "model.layers.26.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 17 }, "model.layers.26.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 36 }, "model.layers.26.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 20 }, "model.layers.26.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 2 }, "model.layers.26.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1 }, "model.layers.26.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 4 }, "model.layers.26.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1 }, "model.layers.27.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1 }, "model.layers.27.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 15 }, "model.layers.27.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 6 }, "model.layers.27.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 5 }, "model.layers.27.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1 }, "model.layers.27.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 4 }, "model.layers.27.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 8 }, "model.layers.28.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 304 }, "model.layers.28.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 248 }, "model.layers.28.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 213 }, "model.layers.28.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 65 }, "model.layers.28.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 174 }, "model.layers.28.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 80 }, "model.layers.28.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 238 }, "model.layers.29.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 436 }, "model.layers.29.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 430 }, "model.layers.29.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 334 }, "model.layers.29.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1 }, "model.layers.29.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1 }, "model.layers.29.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 2 }, "model.layers.29.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 2 }, "model.layers.3.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1216 }, "model.layers.3.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1042 }, "model.layers.3.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1217 }, "model.layers.3.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 228 }, "model.layers.3.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 627 }, "model.layers.3.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 203 }, "model.layers.3.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 574 }, "model.layers.30.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 973 }, "model.layers.30.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 747 }, "model.layers.30.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 683 }, "model.layers.30.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 14 }, "model.layers.30.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 10 }, "model.layers.30.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 14 }, "model.layers.30.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 22 }, "model.layers.31.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2035 }, "model.layers.31.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1270 }, "model.layers.31.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1335 }, "model.layers.31.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 59 }, "model.layers.31.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 214 }, "model.layers.31.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 60 }, "model.layers.31.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 284 }, "model.layers.4.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 960 }, "model.layers.4.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 730 }, "model.layers.4.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 962 }, "model.layers.4.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 271 }, "model.layers.4.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 966 }, "model.layers.4.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 266 }, "model.layers.4.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 699 }, "model.layers.5.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 906 }, "model.layers.5.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 654 }, "model.layers.5.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1019 }, "model.layers.5.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 243 }, "model.layers.5.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 503 }, "model.layers.5.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 233 }, "model.layers.5.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 470 }, "model.layers.6.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 740 }, "model.layers.6.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 535 }, "model.layers.6.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 826 }, "model.layers.6.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 248 }, "model.layers.6.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 456 }, "model.layers.6.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 237 }, "model.layers.6.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 370 }, "model.layers.7.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 437 }, "model.layers.7.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 317 }, "model.layers.7.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 494 }, "model.layers.7.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 244 }, "model.layers.7.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 625 }, "model.layers.7.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 251 }, "model.layers.7.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 545 }, "model.layers.8.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 201 }, "model.layers.8.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 164 }, "model.layers.8.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 203 }, "model.layers.8.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 167 }, "model.layers.8.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 195 }, "model.layers.8.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 149 }, "model.layers.8.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 287 }, "model.layers.9.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 93 }, "model.layers.9.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 118 }, "model.layers.9.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 89 }, "model.layers.9.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 212 }, "model.layers.9.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 284 }, "model.layers.9.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 199 }, "model.layers.9.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 235 } }, "max_position_embeddings": 2048, "mlp_bias": false, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pad_token_id": 0, "pretraining_tp": 1, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.43.2", "use_cache": true, "vocab_size": 32000 }