belztjti commited on
Commit
b4bd1fd
·
verified ·
1 Parent(s): 8e27825

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +20 -37
config.json CHANGED
@@ -1,45 +1,28 @@
1
  {
2
- "_name_or_path": "THUDM/glm-4-9b-chat-1m",
3
- "model_type": "chatglm",
4
  "architectures": [
5
- "ChatGLMModel"
6
  ],
7
- "auto_map": {
8
- "AutoConfig": "configuration_chatglm.ChatGLMConfig",
9
- "AutoModel": "modeling_chatglm.ChatGLMForConditionalGeneration",
10
- "AutoModelForCausalLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
11
- "AutoModelForSeq2SeqLM": "modeling_chatglm.ChatGLMForConditionalGeneration",
12
- "AutoModelForSequenceClassification": "modeling_chatglm.ChatGLMForSequenceClassification"
13
- },
14
- "add_bias_linear": false,
15
- "add_qkv_bias": true,
16
- "apply_query_key_layer_scaling": true,
17
- "apply_residual_connection_post_layernorm": false,
18
  "attention_dropout": 0.0,
19
- "attention_softmax_in_fp32": true,
20
- "attn_implementation": "sdpa",
21
- "bias_dropout_fusion": true,
22
- "ffn_hidden_size": 13696,
23
- "fp32_residual_connection": false,
24
- "hidden_dropout": 0.0,
25
  "hidden_size": 4096,
26
- "kv_channels": 128,
27
- "layernorm_epsilon": 1.5625e-07,
28
- "multi_query_attention": true,
29
- "multi_query_group_num": 4,
 
30
  "num_attention_heads": 32,
31
  "num_hidden_layers": 40,
32
- "num_layers": 40,
33
- "rope_ratio": 10000,
34
- "original_rope": true,
35
- "padded_vocab_size": 151552,
36
- "post_layer_norm": true,
37
- "rmsnorm": true,
38
- "seq_length": 1048576,
39
- "use_cache": true,
40
- "torch_dtype": "bfloat16",
41
- "transformers_version": "4.42.4",
42
  "tie_word_embeddings": false,
43
- "eos_token_id": [151329, 151336, 151338],
44
- "pad_token_id": 151329
45
- }
 
 
 
1
  {
2
+ "_name_or_path": "test",
 
3
  "architectures": [
4
+ "LlamaForCausalLM"
5
  ],
6
+ "attention_bias": false,
 
 
 
 
 
 
 
 
 
 
7
  "attention_dropout": 0.0,
8
+ "bos_token_id": 128000,
9
+ "eos_token_id": 128040,
10
+ "hidden_act": "silu",
 
 
 
11
  "hidden_size": 4096,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 13696,
14
+ "max_position_embeddings": 1048576,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
  "num_attention_heads": 32,
18
  "num_hidden_layers": 40,
19
+ "num_key_value_heads": 8,
20
+ "pretraining_tp": 1,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_theta": 500000.0,
 
 
 
 
 
 
23
  "tie_word_embeddings": false,
24
+ "torch_dtype": "bfloat16",
25
+ "transformers_version": "4.44.0.dev0",
26
+ "use_cache": true,
27
+ "vocab_size": 151552
28
+ }