{ "_name_or_path": "/mnt/dolphinfs/ssd_pool/docker/user/hadoop-vacv/lidengjie/projects/fsvd-llm/cache/llama-7b-hf/models/stage1-bf16-usv", "architectures": [ "LlamaForCausalLM" ], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 1, "eos_token_id": 2, "hidden_act": "silu", "hidden_size": 4096, "initializer_range": 0.02, "intermediate_size": 11008, "linear_info": { "model.layers.0.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2318 }, "model.layers.0.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1267 }, "model.layers.0.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1251 }, "model.layers.0.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 212 }, "model.layers.0.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 670 }, "model.layers.0.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 178 }, "model.layers.0.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1109 }, "model.layers.1.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2370 }, "model.layers.1.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1712 }, "model.layers.1.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1748 }, "model.layers.1.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 375 }, "model.layers.1.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1250 }, "model.layers.1.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 376 }, "model.layers.1.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1195 }, "model.layers.10.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 1980 }, "model.layers.10.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1607 }, "model.layers.10.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2033 }, "model.layers.10.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 631 }, "model.layers.10.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1141 }, "model.layers.10.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 686 }, "model.layers.10.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1153 }, "model.layers.11.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2249 }, "model.layers.11.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1764 }, "model.layers.11.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2106 }, "model.layers.11.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 551 }, "model.layers.11.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1329 }, "model.layers.11.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 568 }, "model.layers.11.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1569 }, "model.layers.12.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2298 }, "model.layers.12.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1699 }, "model.layers.12.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2100 }, "model.layers.12.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 604 }, "model.layers.12.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1252 }, "model.layers.12.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 696 }, "model.layers.12.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1318 }, "model.layers.13.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2335 }, "model.layers.13.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1761 }, "model.layers.13.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2126 }, "model.layers.13.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 534 }, "model.layers.13.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 867 }, "model.layers.13.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 611 }, "model.layers.13.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1152 }, "model.layers.14.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2344 }, "model.layers.14.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1835 }, "model.layers.14.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2151 }, "model.layers.14.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 658 }, "model.layers.14.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1202 }, "model.layers.14.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 702 }, "model.layers.14.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1406 }, "model.layers.15.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2409 }, "model.layers.15.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1903 }, "model.layers.15.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2155 }, "model.layers.15.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 572 }, "model.layers.15.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1327 }, "model.layers.15.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 688 }, "model.layers.15.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1472 }, "model.layers.16.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.16.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1985 }, "model.layers.16.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2214 }, "model.layers.16.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 613 }, "model.layers.16.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1246 }, "model.layers.16.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 665 }, "model.layers.16.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1421 }, "model.layers.17.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.17.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1962 }, "model.layers.17.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2190 }, "model.layers.17.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 612 }, "model.layers.17.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1292 }, "model.layers.17.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 684 }, "model.layers.17.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1394 }, "model.layers.18.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.18.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1990 }, "model.layers.18.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2188 }, "model.layers.18.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 520 }, "model.layers.18.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1370 }, "model.layers.18.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 555 }, "model.layers.18.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1391 }, "model.layers.19.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.19.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1969 }, "model.layers.19.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2140 }, "model.layers.19.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 480 }, "model.layers.19.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 956 }, "model.layers.19.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 517 }, "model.layers.19.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1170 }, "model.layers.2.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.2.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1771 }, "model.layers.2.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1985 }, "model.layers.2.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 571 }, "model.layers.2.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1601 }, "model.layers.2.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 512 }, "model.layers.2.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1451 }, "model.layers.20.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.20.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1915 }, "model.layers.20.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2126 }, "model.layers.20.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 513 }, "model.layers.20.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1027 }, "model.layers.20.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 562 }, "model.layers.20.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1215 }, "model.layers.21.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2460 }, "model.layers.21.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1922 }, "model.layers.21.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2074 }, "model.layers.21.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 368 }, "model.layers.21.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 900 }, "model.layers.21.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 374 }, "model.layers.21.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1093 }, "model.layers.22.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2339 }, "model.layers.22.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1911 }, "model.layers.22.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2050 }, "model.layers.22.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 335 }, "model.layers.22.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 533 }, "model.layers.22.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 352 }, "model.layers.22.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 766 }, "model.layers.23.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2237 }, "model.layers.23.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1903 }, "model.layers.23.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2040 }, "model.layers.23.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 372 }, "model.layers.23.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 647 }, "model.layers.23.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 343 }, "model.layers.23.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 827 }, "model.layers.24.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2129 }, "model.layers.24.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1912 }, "model.layers.24.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2049 }, "model.layers.24.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 253 }, "model.layers.24.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 447 }, "model.layers.24.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 250 }, "model.layers.24.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 753 }, "model.layers.25.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2121 }, "model.layers.25.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1976 }, "model.layers.25.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2068 }, "model.layers.25.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 206 }, "model.layers.25.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 375 }, "model.layers.25.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 247 }, "model.layers.25.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 700 }, "model.layers.26.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2077 }, "model.layers.26.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1994 }, "model.layers.26.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2075 }, "model.layers.26.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 194 }, "model.layers.26.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 387 }, "model.layers.26.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 227 }, "model.layers.26.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 710 }, "model.layers.27.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2056 }, "model.layers.27.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2029 }, "model.layers.27.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2098 }, "model.layers.27.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 172 }, "model.layers.27.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 279 }, "model.layers.27.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 190 }, "model.layers.27.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 579 }, "model.layers.28.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2102 }, "model.layers.28.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2085 }, "model.layers.28.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2179 }, "model.layers.28.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 281 }, "model.layers.28.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 529 }, "model.layers.28.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 291 }, "model.layers.28.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 773 }, "model.layers.29.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2235 }, "model.layers.29.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2205 }, "model.layers.29.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2324 }, "model.layers.29.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 157 }, "model.layers.29.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 381 }, "model.layers.29.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 218 }, "model.layers.29.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 643 }, "model.layers.3.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2278 }, "model.layers.3.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2094 }, "model.layers.3.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2251 }, "model.layers.3.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 542 }, "model.layers.3.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 920 }, "model.layers.3.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 534 }, "model.layers.3.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1283 }, "model.layers.30.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2449 }, "model.layers.30.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2307 }, "model.layers.30.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2394 }, "model.layers.30.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 143 }, "model.layers.30.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 438 }, "model.layers.30.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 166 }, "model.layers.30.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 652 }, "model.layers.31.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 0 }, "model.layers.31.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2427 }, "model.layers.31.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 0 }, "model.layers.31.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 211 }, "model.layers.31.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 516 }, "model.layers.31.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 205 }, "model.layers.31.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 713 }, "model.layers.4.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2198 }, "model.layers.4.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1964 }, "model.layers.4.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2219 }, "model.layers.4.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 658 }, "model.layers.4.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1302 }, "model.layers.4.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 640 }, "model.layers.4.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1316 }, "model.layers.5.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2294 }, "model.layers.5.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1899 }, "model.layers.5.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2332 }, "model.layers.5.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 638 }, "model.layers.5.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1002 }, "model.layers.5.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 654 }, "model.layers.5.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1228 }, "model.layers.6.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2214 }, "model.layers.6.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1834 }, "model.layers.6.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2277 }, "model.layers.6.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 642 }, "model.layers.6.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1225 }, "model.layers.6.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 717 }, "model.layers.6.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1232 }, "model.layers.7.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2072 }, "model.layers.7.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1678 }, "model.layers.7.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2130 }, "model.layers.7.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 564 }, "model.layers.7.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1167 }, "model.layers.7.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 557 }, "model.layers.7.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1256 }, "model.layers.8.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2066 }, "model.layers.8.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1599 }, "model.layers.8.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2113 }, "model.layers.8.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 659 }, "model.layers.8.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1130 }, "model.layers.8.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 650 }, "model.layers.8.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1292 }, "model.layers.9.mlp.down_proj": { "bias": true, "in_features": 11008, "out_features": 4096, "rank": 2057 }, "model.layers.9.mlp.gate_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 1606 }, "model.layers.9.mlp.up_proj": { "bias": true, "in_features": 4096, "out_features": 11008, "rank": 2054 }, "model.layers.9.self_attn.k_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 633 }, "model.layers.9.self_attn.o_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1003 }, "model.layers.9.self_attn.q_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 662 }, "model.layers.9.self_attn.v_proj": { "bias": true, "in_features": 4096, "out_features": 4096, "rank": 1049 } }, "max_position_embeddings": 2048, "mlp_bias": false, "model_type": "llama", "num_attention_heads": 32, "num_hidden_layers": 32, "num_key_value_heads": 32, "pad_token_id": 0, "pretraining_tp": 1, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 10000.0, "tie_word_embeddings": false, "torch_dtype": "bfloat16", "transformers_version": "4.43.2", "use_cache": true, "vocab_size": 32000 }