|
{ |
|
"_name_or_path": "/mnt/dolphinfs/ssd_pool/docker/user/hadoop-vacv/lidengjie/projects/fsvd-llm/cache/llama-7b-hf/models/stage1-bf16-usv", |
|
"architectures": [ |
|
"LlamaForCausalLM" |
|
], |
|
"attention_bias": false, |
|
"attention_dropout": 0.0, |
|
"bos_token_id": 1, |
|
"eos_token_id": 2, |
|
"hidden_act": "silu", |
|
"hidden_size": 4096, |
|
"initializer_range": 0.02, |
|
"intermediate_size": 11008, |
|
"linear_info": { |
|
"model.layers.0.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2023 |
|
}, |
|
"model.layers.0.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1124 |
|
}, |
|
"model.layers.0.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1139 |
|
}, |
|
"model.layers.0.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 94 |
|
}, |
|
"model.layers.0.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 538 |
|
}, |
|
"model.layers.0.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 83 |
|
}, |
|
"model.layers.0.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1063 |
|
}, |
|
"model.layers.1.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1644 |
|
}, |
|
"model.layers.1.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1288 |
|
}, |
|
"model.layers.1.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1319 |
|
}, |
|
"model.layers.1.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 207 |
|
}, |
|
"model.layers.1.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 945 |
|
}, |
|
"model.layers.1.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 211 |
|
}, |
|
"model.layers.1.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 762 |
|
}, |
|
"model.layers.10.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 193 |
|
}, |
|
"model.layers.10.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 162 |
|
}, |
|
"model.layers.10.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 196 |
|
}, |
|
"model.layers.10.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 94 |
|
}, |
|
"model.layers.10.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 32 |
|
}, |
|
"model.layers.10.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 113 |
|
}, |
|
"model.layers.10.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 99 |
|
}, |
|
"model.layers.11.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 273 |
|
}, |
|
"model.layers.11.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 219 |
|
}, |
|
"model.layers.11.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 268 |
|
}, |
|
"model.layers.11.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 54 |
|
}, |
|
"model.layers.11.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 34 |
|
}, |
|
"model.layers.11.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 62 |
|
}, |
|
"model.layers.11.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 133 |
|
}, |
|
"model.layers.12.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 46 |
|
}, |
|
"model.layers.12.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 84 |
|
}, |
|
"model.layers.12.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 66 |
|
}, |
|
"model.layers.12.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 100 |
|
}, |
|
"model.layers.12.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 63 |
|
}, |
|
"model.layers.12.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 115 |
|
}, |
|
"model.layers.12.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 156 |
|
}, |
|
"model.layers.13.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 270 |
|
}, |
|
"model.layers.13.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 247 |
|
}, |
|
"model.layers.13.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 278 |
|
}, |
|
"model.layers.13.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 40 |
|
}, |
|
"model.layers.13.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1 |
|
}, |
|
"model.layers.13.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 42 |
|
}, |
|
"model.layers.13.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 55 |
|
}, |
|
"model.layers.14.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 678 |
|
}, |
|
"model.layers.14.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 467 |
|
}, |
|
"model.layers.14.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 613 |
|
}, |
|
"model.layers.14.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 271 |
|
}, |
|
"model.layers.14.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 702 |
|
}, |
|
"model.layers.14.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 265 |
|
}, |
|
"model.layers.14.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 682 |
|
}, |
|
"model.layers.15.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 996 |
|
}, |
|
"model.layers.15.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 665 |
|
}, |
|
"model.layers.15.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 826 |
|
}, |
|
"model.layers.15.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 126 |
|
}, |
|
"model.layers.15.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 359 |
|
}, |
|
"model.layers.15.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 152 |
|
}, |
|
"model.layers.15.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 455 |
|
}, |
|
"model.layers.16.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1673 |
|
}, |
|
"model.layers.16.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 978 |
|
}, |
|
"model.layers.16.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1109 |
|
}, |
|
"model.layers.16.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 128 |
|
}, |
|
"model.layers.16.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 504 |
|
}, |
|
"model.layers.16.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 159 |
|
}, |
|
"model.layers.16.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 469 |
|
}, |
|
"model.layers.17.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1545 |
|
}, |
|
"model.layers.17.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 905 |
|
}, |
|
"model.layers.17.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1087 |
|
}, |
|
"model.layers.17.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 262 |
|
}, |
|
"model.layers.17.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1038 |
|
}, |
|
"model.layers.17.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 327 |
|
}, |
|
"model.layers.17.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 785 |
|
}, |
|
"model.layers.18.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1414 |
|
}, |
|
"model.layers.18.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 851 |
|
}, |
|
"model.layers.18.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 984 |
|
}, |
|
"model.layers.18.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 298 |
|
}, |
|
"model.layers.18.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1130 |
|
}, |
|
"model.layers.18.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 285 |
|
}, |
|
"model.layers.18.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 857 |
|
}, |
|
"model.layers.19.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1348 |
|
}, |
|
"model.layers.19.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 819 |
|
}, |
|
"model.layers.19.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 999 |
|
}, |
|
"model.layers.19.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 160 |
|
}, |
|
"model.layers.19.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 648 |
|
}, |
|
"model.layers.19.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 151 |
|
}, |
|
"model.layers.19.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 497 |
|
}, |
|
"model.layers.2.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1450 |
|
}, |
|
"model.layers.2.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1014 |
|
}, |
|
"model.layers.2.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1167 |
|
}, |
|
"model.layers.2.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 257 |
|
}, |
|
"model.layers.2.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1176 |
|
}, |
|
"model.layers.2.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 205 |
|
}, |
|
"model.layers.2.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 862 |
|
}, |
|
"model.layers.20.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1412 |
|
}, |
|
"model.layers.20.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 844 |
|
}, |
|
"model.layers.20.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 965 |
|
}, |
|
"model.layers.20.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 151 |
|
}, |
|
"model.layers.20.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 685 |
|
}, |
|
"model.layers.20.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 166 |
|
}, |
|
"model.layers.20.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 587 |
|
}, |
|
"model.layers.21.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1355 |
|
}, |
|
"model.layers.21.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 833 |
|
}, |
|
"model.layers.21.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 908 |
|
}, |
|
"model.layers.21.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 137 |
|
}, |
|
"model.layers.21.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 472 |
|
}, |
|
"model.layers.21.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 124 |
|
}, |
|
"model.layers.21.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 393 |
|
}, |
|
"model.layers.22.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1113 |
|
}, |
|
"model.layers.22.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 667 |
|
}, |
|
"model.layers.22.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 737 |
|
}, |
|
"model.layers.22.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 92 |
|
}, |
|
"model.layers.22.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 130 |
|
}, |
|
"model.layers.22.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 89 |
|
}, |
|
"model.layers.22.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 150 |
|
}, |
|
"model.layers.23.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 856 |
|
}, |
|
"model.layers.23.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 478 |
|
}, |
|
"model.layers.23.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 586 |
|
}, |
|
"model.layers.23.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 168 |
|
}, |
|
"model.layers.23.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 516 |
|
}, |
|
"model.layers.23.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 144 |
|
}, |
|
"model.layers.23.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 356 |
|
}, |
|
"model.layers.24.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 435 |
|
}, |
|
"model.layers.24.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 242 |
|
}, |
|
"model.layers.24.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 319 |
|
}, |
|
"model.layers.24.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 96 |
|
}, |
|
"model.layers.24.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 440 |
|
}, |
|
"model.layers.24.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 93 |
|
}, |
|
"model.layers.24.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 297 |
|
}, |
|
"model.layers.25.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 147 |
|
}, |
|
"model.layers.25.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 121 |
|
}, |
|
"model.layers.25.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 122 |
|
}, |
|
"model.layers.25.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 11 |
|
}, |
|
"model.layers.25.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 7 |
|
}, |
|
"model.layers.25.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 14 |
|
}, |
|
"model.layers.25.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 13 |
|
}, |
|
"model.layers.26.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 17 |
|
}, |
|
"model.layers.26.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 36 |
|
}, |
|
"model.layers.26.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 20 |
|
}, |
|
"model.layers.26.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 2 |
|
}, |
|
"model.layers.26.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1 |
|
}, |
|
"model.layers.26.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 4 |
|
}, |
|
"model.layers.26.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1 |
|
}, |
|
"model.layers.27.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1 |
|
}, |
|
"model.layers.27.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 15 |
|
}, |
|
"model.layers.27.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 6 |
|
}, |
|
"model.layers.27.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 5 |
|
}, |
|
"model.layers.27.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1 |
|
}, |
|
"model.layers.27.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 4 |
|
}, |
|
"model.layers.27.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 8 |
|
}, |
|
"model.layers.28.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 304 |
|
}, |
|
"model.layers.28.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 248 |
|
}, |
|
"model.layers.28.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 213 |
|
}, |
|
"model.layers.28.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 65 |
|
}, |
|
"model.layers.28.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 174 |
|
}, |
|
"model.layers.28.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 80 |
|
}, |
|
"model.layers.28.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 238 |
|
}, |
|
"model.layers.29.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 436 |
|
}, |
|
"model.layers.29.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 430 |
|
}, |
|
"model.layers.29.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 334 |
|
}, |
|
"model.layers.29.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1 |
|
}, |
|
"model.layers.29.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 1 |
|
}, |
|
"model.layers.29.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 2 |
|
}, |
|
"model.layers.29.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 2 |
|
}, |
|
"model.layers.3.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 1216 |
|
}, |
|
"model.layers.3.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1042 |
|
}, |
|
"model.layers.3.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1217 |
|
}, |
|
"model.layers.3.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 228 |
|
}, |
|
"model.layers.3.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 627 |
|
}, |
|
"model.layers.3.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 203 |
|
}, |
|
"model.layers.3.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 574 |
|
}, |
|
"model.layers.30.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 973 |
|
}, |
|
"model.layers.30.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 747 |
|
}, |
|
"model.layers.30.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 683 |
|
}, |
|
"model.layers.30.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 14 |
|
}, |
|
"model.layers.30.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 10 |
|
}, |
|
"model.layers.30.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 14 |
|
}, |
|
"model.layers.30.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 22 |
|
}, |
|
"model.layers.31.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 2035 |
|
}, |
|
"model.layers.31.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1270 |
|
}, |
|
"model.layers.31.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1335 |
|
}, |
|
"model.layers.31.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 59 |
|
}, |
|
"model.layers.31.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 214 |
|
}, |
|
"model.layers.31.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 60 |
|
}, |
|
"model.layers.31.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 284 |
|
}, |
|
"model.layers.4.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 960 |
|
}, |
|
"model.layers.4.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 730 |
|
}, |
|
"model.layers.4.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 962 |
|
}, |
|
"model.layers.4.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 271 |
|
}, |
|
"model.layers.4.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 966 |
|
}, |
|
"model.layers.4.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 266 |
|
}, |
|
"model.layers.4.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 699 |
|
}, |
|
"model.layers.5.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 906 |
|
}, |
|
"model.layers.5.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 654 |
|
}, |
|
"model.layers.5.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 1019 |
|
}, |
|
"model.layers.5.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 243 |
|
}, |
|
"model.layers.5.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 503 |
|
}, |
|
"model.layers.5.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 233 |
|
}, |
|
"model.layers.5.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 470 |
|
}, |
|
"model.layers.6.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 740 |
|
}, |
|
"model.layers.6.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 535 |
|
}, |
|
"model.layers.6.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 826 |
|
}, |
|
"model.layers.6.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 248 |
|
}, |
|
"model.layers.6.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 456 |
|
}, |
|
"model.layers.6.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 237 |
|
}, |
|
"model.layers.6.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 370 |
|
}, |
|
"model.layers.7.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 437 |
|
}, |
|
"model.layers.7.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 317 |
|
}, |
|
"model.layers.7.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 494 |
|
}, |
|
"model.layers.7.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 244 |
|
}, |
|
"model.layers.7.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 625 |
|
}, |
|
"model.layers.7.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 251 |
|
}, |
|
"model.layers.7.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 545 |
|
}, |
|
"model.layers.8.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 201 |
|
}, |
|
"model.layers.8.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 164 |
|
}, |
|
"model.layers.8.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 203 |
|
}, |
|
"model.layers.8.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 167 |
|
}, |
|
"model.layers.8.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 195 |
|
}, |
|
"model.layers.8.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 149 |
|
}, |
|
"model.layers.8.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 287 |
|
}, |
|
"model.layers.9.mlp.down_proj": { |
|
"bias": true, |
|
"in_features": 11008, |
|
"out_features": 4096, |
|
"rank": 93 |
|
}, |
|
"model.layers.9.mlp.gate_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 118 |
|
}, |
|
"model.layers.9.mlp.up_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 11008, |
|
"rank": 89 |
|
}, |
|
"model.layers.9.self_attn.k_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 212 |
|
}, |
|
"model.layers.9.self_attn.o_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 284 |
|
}, |
|
"model.layers.9.self_attn.q_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 199 |
|
}, |
|
"model.layers.9.self_attn.v_proj": { |
|
"bias": true, |
|
"in_features": 4096, |
|
"out_features": 4096, |
|
"rank": 235 |
|
} |
|
}, |
|
"max_position_embeddings": 2048, |
|
"mlp_bias": false, |
|
"model_type": "llama", |
|
"num_attention_heads": 32, |
|
"num_hidden_layers": 32, |
|
"num_key_value_heads": 32, |
|
"pad_token_id": 0, |
|
"pretraining_tp": 1, |
|
"rms_norm_eps": 1e-06, |
|
"rope_scaling": null, |
|
"rope_theta": 10000.0, |
|
"tie_word_embeddings": false, |
|
"torch_dtype": "bfloat16", |
|
"transformers_version": "4.43.2", |
|
"use_cache": true, |
|
"vocab_size": 32000 |
|
} |
|
|