{ "_name_or_path": "/mnt/dolphinfs/ssd_pool/docker/user/hadoop-vacv/lidengjie/projects/fsvd-llm/cache/llama-7b-hf/models/stage1-bf16-usv", "architectures": [ "LlamaForCausalLM" ], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "linear_info": { "model.layers.0.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.0.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1368 }, "model.layers.0.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1372 }, "model.layers.0.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 239 }, "model.layers.0.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 794 }, "model.layers.0.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 202 }, "model.layers.0.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1213 }, "model.layers.1.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.1.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1883 }, "model.layers.1.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1946 }, "model.layers.1.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 459 }, "model.layers.1.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1356 }, "model.layers.1.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 448 }, "model.layers.1.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1358 }, "model.layers.10.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.10.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1999 }, "model.layers.10.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.10.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 845 }, "model.layers.10.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.10.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 882 }, "model.layers.10.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.11.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.11.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2079 }, "model.layers.11.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.11.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 760 }, "model.layers.11.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.11.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 778 }, "model.layers.11.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.12.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.12.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2056 }, "model.layers.12.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.12.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 832 }, "model.layers.12.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.12.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 909 }, "model.layers.12.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.13.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.13.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2101 }, "model.layers.13.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.13.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 743 }, "model.layers.13.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1397 }, "model.layers.13.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 836 }, "model.layers.13.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.14.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.14.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2190 }, "model.layers.14.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.14.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 877 }, "model.layers.14.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.14.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 933 }, "model.layers.14.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.15.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.15.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2240 }, "model.layers.15.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.15.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 780 }, "model.layers.15.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.15.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 887 }, "model.layers.15.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.16.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.16.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.16.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.16.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 858 }, "model.layers.16.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.16.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 885 }, "model.layers.16.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.17.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.17.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2261 }, "model.layers.17.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.17.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 835 }, "model.layers.17.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.17.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 913 }, "model.layers.17.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.18.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.18.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2292 }, "model.layers.18.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.18.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 743 }, "model.layers.18.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.18.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 775 }, "model.layers.18.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.19.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.19.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2283 }, "model.layers.19.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.19.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 682 }, "model.layers.19.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1515 }, "model.layers.19.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 758 }, "model.layers.19.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1468 }, "model.layers.2.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.2.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2008 }, "model.layers.2.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.2.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 658 }, "model.layers.2.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.2.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 630 }, "model.layers.2.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.20.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.20.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2285 }, "model.layers.20.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.20.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 762 }, "model.layers.20.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1472 }, "model.layers.20.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 804 }, "model.layers.20.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1495 }, "model.layers.21.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.21.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2287 }, "model.layers.21.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.21.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 586 }, "model.layers.21.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1502 }, "model.layers.21.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 609 }, "model.layers.21.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1391 }, "model.layers.22.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.22.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2306 }, "model.layers.22.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.22.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 541 }, "model.layers.22.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1008 }, "model.layers.22.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 545 }, "model.layers.22.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1084 }, "model.layers.23.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.23.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2316 }, "model.layers.23.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.23.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 575 }, "model.layers.23.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1220 }, "model.layers.23.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 519 }, "model.layers.23.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1169 }, "model.layers.24.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.24.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2356 }, "model.layers.24.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.24.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 442 }, "model.layers.24.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1067 }, "model.layers.24.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 470 }, "model.layers.24.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1116 }, "model.layers.25.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.25.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.25.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.25.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 485 }, "model.layers.25.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1042 }, "model.layers.25.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 533 }, "model.layers.25.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1179 }, "model.layers.26.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.26.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.26.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.26.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 481 }, "model.layers.26.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1006 }, "model.layers.26.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 572 }, "model.layers.26.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1154 }, "model.layers.27.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.27.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.27.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.27.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 370 }, "model.layers.27.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 790 }, "model.layers.27.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 414 }, "model.layers.27.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1019 }, "model.layers.28.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.28.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.28.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.28.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 490 }, "model.layers.28.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1084 }, "model.layers.28.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 494 }, "model.layers.28.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1161 }, "model.layers.29.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.29.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.29.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.29.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 374 }, "model.layers.29.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 955 }, "model.layers.29.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 411 }, "model.layers.29.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1123 }, "model.layers.3.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.3.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.3.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.3.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 704 }, "model.layers.3.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1196 }, "model.layers.3.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 698 }, "model.layers.3.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.30.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.30.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.30.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.30.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 282 }, "model.layers.30.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 967 }, "model.layers.30.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 297 }, "model.layers.30.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1146 }, "model.layers.31.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.31.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.31.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.31.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 337 }, "model.layers.31.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 852 }, "model.layers.31.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 341 }, "model.layers.31.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1060 }, "model.layers.4.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.4.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.4.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.4.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 834 }, "model.layers.4.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.4.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 815 }, "model.layers.4.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.5.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.5.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2259 }, "model.layers.5.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.5.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 810 }, "model.layers.5.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1354 }, "model.layers.5.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 843 }, "model.layers.5.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.6.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.6.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2239 }, "model.layers.6.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.6.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 793 }, "model.layers.6.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.6.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 890 }, "model.layers.6.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.7.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.7.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2060 }, "model.layers.7.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.7.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 727 }, "model.layers.7.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.7.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 736 }, "model.layers.7.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.8.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.8.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2007 }, "model.layers.8.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.8.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 831 }, "model.layers.8.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1477 }, "model.layers.8.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 823 }, "model.layers.8.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 0 }, "model.layers.9.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.9.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1995 }, "model.layers.9.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.9.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 822 }, "model.layers.9.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1403 }, "model.layers.9.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 873 }, "model.layers.9.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1321 } }, "max_position_embeddings": 2048, "mlp_bias": false, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pad_token_id": 0, "pretraining_tp": 1, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.43.2", "use_cache": true, "vocab_size": 32000 }