|
{ |
|
"_name_or_path": "/mnt/dolphinfs/ssd_pool/docker/user/hadoop-vacv/lidengjie/projects/fsvd-llm/cache/llama-7b-hf/models/stage1-bf16-usv", |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"hidden_act": "silu", |
|
"hidden_size": 4096, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 11008, |
|
"linear_info": { |
|
"model.layers.0.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2318 |
|
}, |
|
"model.layers.0.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1267 |
|
}, |
|
"model.layers.0.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1251 |
|
}, |
|
"model.layers.0.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 212 |
|
}, |
|
"model.layers.0.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 670 |
|
}, |
|
"model.layers.0.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 178 |
|
}, |
|
"model.layers.0.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1109 |
|
}, |
|
"model.layers.1.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2370 |
|
}, |
|
"model.layers.1.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1712 |
|
}, |
|
"model.layers.1.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1748 |
|
}, |
|
"model.layers.1.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 375 |
|
}, |
|
"model.layers.1.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1250 |
|
}, |
|
"model.layers.1.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 376 |
|
}, |
|
"model.layers.1.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1195 |
|
}, |
|
"model.layers.10.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1980 |
|
}, |
|
"model.layers.10.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1607 |
|
}, |
|
"model.layers.10.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2033 |
|
}, |
|
"model.layers.10.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 631 |
|
}, |
|
"model.layers.10.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1141 |
|
}, |
|
"model.layers.10.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 686 |
|
}, |
|
"model.layers.10.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1153 |
|
}, |
|
"model.layers.11.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2249 |
|
}, |
|
"model.layers.11.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1764 |
|
}, |
|
"model.layers.11.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2106 |
|
}, |
|
"model.layers.11.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 551 |
|
}, |
|
"model.layers.11.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1329 |
|
}, |
|
"model.layers.11.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 568 |
|
}, |
|
"model.layers.11.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1569 |
|
}, |
|
"model.layers.12.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2298 |
|
}, |
|
"model.layers.12.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1699 |
|
}, |
|
"model.layers.12.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2100 |
|
}, |
|
"model.layers.12.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 604 |
|
}, |
|
"model.layers.12.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1252 |
|
}, |
|
"model.layers.12.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 696 |
|
}, |
|
"model.layers.12.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1318 |
|
}, |
|
"model.layers.13.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2335 |
|
}, |
|
"model.layers.13.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1761 |
|
}, |
|
"model.layers.13.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2126 |
|
}, |
|
"model.layers.13.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 534 |
|
}, |
|
"model.layers.13.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 867 |
|
}, |
|
"model.layers.13.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 611 |
|
}, |
|
"model.layers.13.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1152 |
|
}, |
|
"model.layers.14.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2344 |
|
}, |
|
"model.layers.14.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1835 |
|
}, |
|
"model.layers.14.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2151 |
|
}, |
|
"model.layers.14.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 658 |
|
}, |
|
"model.layers.14.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1202 |
|
}, |
|
"model.layers.14.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 702 |
|
}, |
|
"model.layers.14.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1406 |
|
}, |
|
"model.layers.15.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2409 |
|
}, |
|
"model.layers.15.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1903 |
|
}, |
|
"model.layers.15.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2155 |
|
}, |
|
"model.layers.15.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 572 |
|
}, |
|
"model.layers.15.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1327 |
|
}, |
|
"model.layers.15.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 688 |
|
}, |
|
"model.layers.15.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1472 |
|
}, |
|
"model.layers.16.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 0 |
|
}, |
|
"model.layers.16.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1985 |
|
}, |
|
"model.layers.16.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2214 |
|
}, |
|
"model.layers.16.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 613 |
|
}, |
|
"model.layers.16.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1246 |
|
}, |
|
"model.layers.16.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 665 |
|
}, |
|
"model.layers.16.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1421 |
|
}, |
|
"model.layers.17.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 0 |
|
}, |
|
"model.layers.17.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1962 |
|
}, |
|
"model.layers.17.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2190 |
|
}, |
|
"model.layers.17.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 612 |
|
}, |
|
"model.layers.17.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1292 |
|
}, |
|
"model.layers.17.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 684 |
|
}, |
|
"model.layers.17.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1394 |
|
}, |
|
"model.layers.18.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 0 |
|
}, |
|
"model.layers.18.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1990 |
|
}, |
|
"model.layers.18.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2188 |
|
}, |
|
"model.layers.18.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 520 |
|
}, |
|
"model.layers.18.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1370 |
|
}, |
|
"model.layers.18.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 555 |
|
}, |
|
"model.layers.18.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1391 |
|
}, |
|
"model.layers.19.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 0 |
|
}, |
|
"model.layers.19.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1969 |
|
}, |
|
"model.layers.19.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2140 |
|
}, |
|
"model.layers.19.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 480 |
|
}, |
|
"model.layers.19.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 956 |
|
}, |
|
"model.layers.19.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 517 |
|
}, |
|
"model.layers.19.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1170 |
|
}, |
|
"model.layers.2.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 0 |
|
}, |
|
"model.layers.2.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1771 |
|
}, |
|
"model.layers.2.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1985 |
|
}, |
|
"model.layers.2.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 571 |
|
}, |
|
"model.layers.2.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1601 |
|
}, |
|
"model.layers.2.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 512 |
|
}, |
|
"model.layers.2.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1451 |
|
}, |
|
"model.layers.20.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 0 |
|
}, |
|
"model.layers.20.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1915 |
|
}, |
|
"model.layers.20.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2126 |
|
}, |
|
"model.layers.20.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 513 |
|
}, |
|
"model.layers.20.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1027 |
|
}, |
|
"model.layers.20.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 562 |
|
}, |
|
"model.layers.20.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1215 |
|
}, |
|
"model.layers.21.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2460 |
|
}, |
|
"model.layers.21.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1922 |
|
}, |
|
"model.layers.21.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2074 |
|
}, |
|
"model.layers.21.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 368 |
|
}, |
|
"model.layers.21.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 900 |
|
}, |
|
"model.layers.21.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 374 |
|
}, |
|
"model.layers.21.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1093 |
|
}, |
|
"model.layers.22.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2339 |
|
}, |
|
"model.layers.22.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1911 |
|
}, |
|
"model.layers.22.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2050 |
|
}, |
|
"model.layers.22.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 335 |
|
}, |
|
"model.layers.22.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 533 |
|
}, |
|
"model.layers.22.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 352 |
|
}, |
|
"model.layers.22.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 766 |
|
}, |
|
"model.layers.23.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2237 |
|
}, |
|
"model.layers.23.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1903 |
|
}, |
|
"model.layers.23.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2040 |
|
}, |
|
"model.layers.23.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 372 |
|
}, |
|
"model.layers.23.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 647 |
|
}, |
|
"model.layers.23.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 343 |
|
}, |
|
"model.layers.23.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 827 |
|
}, |
|
"model.layers.24.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2129 |
|
}, |
|
"model.layers.24.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1912 |
|
}, |
|
"model.layers.24.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2049 |
|
}, |
|
"model.layers.24.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 253 |
|
}, |
|
"model.layers.24.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 447 |
|
}, |
|
"model.layers.24.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 250 |
|
}, |
|
"model.layers.24.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 753 |
|
}, |
|
"model.layers.25.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2121 |
|
}, |
|
"model.layers.25.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1976 |
|
}, |
|
"model.layers.25.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2068 |
|
}, |
|
"model.layers.25.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 206 |
|
}, |
|
"model.layers.25.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 375 |
|
}, |
|
"model.layers.25.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 247 |
|
}, |
|
"model.layers.25.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 700 |
|
}, |
|
"model.layers.26.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2077 |
|
}, |
|
"model.layers.26.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1994 |
|
}, |
|
"model.layers.26.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2075 |
|
}, |
|
"model.layers.26.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 194 |
|
}, |
|
"model.layers.26.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 387 |
|
}, |
|
"model.layers.26.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 227 |
|
}, |
|
"model.layers.26.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 710 |
|
}, |
|
"model.layers.27.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2056 |
|
}, |
|
"model.layers.27.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2029 |
|
}, |
|
"model.layers.27.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2098 |
|
}, |
|
"model.layers.27.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 172 |
|
}, |
|
"model.layers.27.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 279 |
|
}, |
|
"model.layers.27.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 190 |
|
}, |
|
"model.layers.27.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 579 |
|
}, |
|
"model.layers.28.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2102 |
|
}, |
|
"model.layers.28.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2085 |
|
}, |
|
"model.layers.28.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2179 |
|
}, |
|
"model.layers.28.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 281 |
|
}, |
|
"model.layers.28.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 529 |
|
}, |
|
"model.layers.28.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 291 |
|
}, |
|
"model.layers.28.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 773 |
|
}, |
|
"model.layers.29.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2235 |
|
}, |
|
"model.layers.29.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2205 |
|
}, |
|
"model.layers.29.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2324 |
|
}, |
|
"model.layers.29.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 157 |
|
}, |
|
"model.layers.29.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 381 |
|
}, |
|
"model.layers.29.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 218 |
|
}, |
|
"model.layers.29.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 643 |
|
}, |
|
"model.layers.3.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2278 |
|
}, |
|
"model.layers.3.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2094 |
|
}, |
|
"model.layers.3.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2251 |
|
}, |
|
"model.layers.3.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 542 |
|
}, |
|
"model.layers.3.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 920 |
|
}, |
|
"model.layers.3.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 534 |
|
}, |
|
"model.layers.3.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1283 |
|
}, |
|
"model.layers.30.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2449 |
|
}, |
|
"model.layers.30.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2307 |
|
}, |
|
"model.layers.30.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2394 |
|
}, |
|
"model.layers.30.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 143 |
|
}, |
|
"model.layers.30.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 438 |
|
}, |
|
"model.layers.30.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 166 |
|
}, |
|
"model.layers.30.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 652 |
|
}, |
|
"model.layers.31.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 0 |
|
}, |
|
"model.layers.31.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2427 |
|
}, |
|
"model.layers.31.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 0 |
|
}, |
|
"model.layers.31.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 211 |
|
}, |
|
"model.layers.31.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 516 |
|
}, |
|
"model.layers.31.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 205 |
|
}, |
|
"model.layers.31.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 713 |
|
}, |
|
"model.layers.4.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2198 |
|
}, |
|
"model.layers.4.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1964 |
|
}, |
|
"model.layers.4.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2219 |
|
}, |
|
"model.layers.4.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 658 |
|
}, |
|
"model.layers.4.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1302 |
|
}, |
|
"model.layers.4.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 640 |
|
}, |
|
"model.layers.4.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1316 |
|
}, |
|
"model.layers.5.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2294 |
|
}, |
|
"model.layers.5.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1899 |
|
}, |
|
"model.layers.5.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2332 |
|
}, |
|
"model.layers.5.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 638 |
|
}, |
|
"model.layers.5.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1002 |
|
}, |
|
"model.layers.5.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 654 |
|
}, |
|
"model.layers.5.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1228 |
|
}, |
|
"model.layers.6.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2214 |
|
}, |
|
"model.layers.6.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1834 |
|
}, |
|
"model.layers.6.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2277 |
|
}, |
|
"model.layers.6.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 642 |
|
}, |
|
"model.layers.6.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1225 |
|
}, |
|
"model.layers.6.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 717 |
|
}, |
|
"model.layers.6.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1232 |
|
}, |
|
"model.layers.7.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2072 |
|
}, |
|
"model.layers.7.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1678 |
|
}, |
|
"model.layers.7.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2130 |
|
}, |
|
"model.layers.7.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 564 |
|
}, |
|
"model.layers.7.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1167 |
|
}, |
|
"model.layers.7.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 557 |
|
}, |
|
"model.layers.7.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1256 |
|
}, |
|
"model.layers.8.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2066 |
|
}, |
|
"model.layers.8.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1599 |
|
}, |
|
"model.layers.8.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2113 |
|
}, |
|
"model.layers.8.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 659 |
|
}, |
|
"model.layers.8.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1130 |
|
}, |
|
"model.layers.8.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 650 |
|
}, |
|
"model.layers.8.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1292 |
|
}, |
|
"model.layers.9.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2057 |
|
}, |
|
"model.layers.9.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1606 |
|
}, |
|
"model.layers.9.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 2054 |
|
}, |
|
"model.layers.9.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 633 |
|
}, |
|
"model.layers.9.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1003 |
|
}, |
|
"model.layers.9.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 662 |
|
}, |
|
"model.layers.9.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1049 |
|
} |
|
}, |
|
"max_position_embeddings": 2048, |
|
"mlp_bias": false, |
|
"model_type": "llama", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 32, |
|
"num_key_value_heads": 32, |
|
"pad_token_id": 0, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 10000.0, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.43.2", |
|
"use_cache": true, |
|
"vocab_size": 32000 |
|
} |
|
|