|
{ |
|
"attn_cfg": { |
|
"causal": true, |
|
"head_dim": 64, |
|
"num_heads": 48, |
|
"out_proj_bias": true, |
|
"qkv_proj_bias": true, |
|
"sliding_window_length": 2048 |
|
}, |
|
"attn_layer_idx": [ |
|
6, |
|
18, |
|
30, |
|
42 |
|
], |
|
"d_model": 2048, |
|
"eos_token_id": 50279, |
|
"mlp_cfg": {}, |
|
"mlp_layer_idx": [ |
|
2, |
|
5, |
|
8, |
|
11, |
|
14, |
|
17, |
|
20, |
|
23, |
|
26, |
|
29, |
|
32, |
|
35, |
|
38, |
|
41, |
|
44, |
|
47 |
|
], |
|
"model_type": "rene", |
|
"n_layer": 48, |
|
"pad_token_id": 1, |
|
"pad_vocab_size_multiple": 16, |
|
"residual_in_fp32": true, |
|
"rms_norm": true, |
|
"ssm_cfg": { |
|
"norm_before_gate": true |
|
}, |
|
"tie_word_embeddings": true, |
|
"vocab_size": 50280 |
|
} |
|
|