smallcloudteam commited on
Commit
5cc155f
·
1 Parent(s): f54e655

Upload config

Browse files
Files changed (2) hide show
  1. config.json +58 -0
  2. configuration_codify.py +152 -0
config.json ADDED
@@ -0,0 +1,58 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "E": 2560,
3
+ "L": 32,
4
+ "T": 2048,
5
+ "_mup": true,
6
+ "alt_pw_klass": {
7
+ "type": ""
8
+ },
9
+ "alt_rel_klass": {
10
+ "fused": true,
11
+ "type": "alibi"
12
+ },
13
+ "alt_sa_klass": {
14
+ "triton": true,
15
+ "type": "flash",
16
+ "use_rotary_emb": null
17
+ },
18
+ "attn_a_reach": 2048,
19
+ "attn_b_reach": 2048,
20
+ "attn_heads": 40,
21
+ "attn_ra_nbasis": 64,
22
+ "attn_seq": [
23
+ "d"
24
+ ],
25
+ "attn_sparse_layout_seq": null,
26
+ "auto_map": {
27
+ "AutoConfig": "configuration_codify.CodifyConfig"
28
+ },
29
+ "backcheck_pw": "inside",
30
+ "backcheck_sa": "none",
31
+ "bos_token_id": 1,
32
+ "dtype_acts": "torch.float16",
33
+ "dtype_weights": "torch.float16",
34
+ "enc_name": "openai_programming_v2",
35
+ "eos_token_id": 2,
36
+ "init_scale": 1,
37
+ "initializer_range": 0.02,
38
+ "layer_norm_epsilon": 1e-05,
39
+ "mlp_mult": 4,
40
+ "model_type": "codify",
41
+ "moe": null,
42
+ "mup_optimal_lr": 0.0005,
43
+ "mup_shapes_file": "lean_former/mup/alibi_32l/shapes.json",
44
+ "posemb": false,
45
+ "rescale_embeddings": false,
46
+ "tie_word_embeddings": false,
47
+ "transformers_version": "4.24.0",
48
+ "tune": [
49
+ 3,
50
+ 3,
51
+ 3,
52
+ 3
53
+ ],
54
+ "unembedding_shared": false,
55
+ "use_cache": true,
56
+ "use_res_scale": false,
57
+ "vocab_size": 51305
58
+ }
configuration_codify.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import OrderedDict
2
+ from typing import TYPE_CHECKING, Any, List, Mapping, Optional
3
+
4
+ from packaging import version
5
+
6
+ from transformers import is_torch_available
7
+
8
+ if TYPE_CHECKING:
9
+ from transformers import PreTrainedTokenizer, TensorType
10
+
11
+ from transformers.configuration_utils import PretrainedConfig
12
+ from transformers.onnx import OnnxConfigWithPast, PatchingSpec
13
+ from transformers.utils import logging
14
+
15
+ logger = logging.get_logger(__name__)
16
+
17
+ CODIFY_PRETRAINED_CONFIG_ARCHIVE_MAP = {
18
+ "smallcloudai/codify_medium_multi": "https://huggingface.co/smallcloudai/codify_medium_multi/blob/main/config.json",
19
+ "smallcloudai/codify_3b_multi": "https://huggingface.co/smallcloudai/codify_3b_multi/blob/main/config.json",
20
+ }
21
+
22
+
23
+ class CodifyConfig(PretrainedConfig):
24
+ model_type = "codify"
25
+ keys_to_ignore_at_inference = ["past_key_values"]
26
+ attribute_map = {
27
+ "num_hidden_layers": "L",
28
+ "num_attention_heads": "attn_heads",
29
+ "hidden_size": "E",
30
+ }
31
+
32
+ def __init__(
33
+ self,
34
+ vocab_size=51305,
35
+ layer_norm_epsilon=1e-5,
36
+ initializer_range=0.02,
37
+ use_cache=True,
38
+ bos_token_id=1,
39
+ eos_token_id=2,
40
+ mlp_mult=4,
41
+ tie_word_embeddings=False,
42
+ **kwargs,
43
+ ):
44
+ self.vocab_size = vocab_size
45
+ self.mlp_mult = mlp_mult
46
+ self.layer_norm_epsilon = layer_norm_epsilon
47
+ self.initializer_range = initializer_range
48
+ self.use_cache = use_cache
49
+
50
+ self.bos_token_id = bos_token_id
51
+ self.eos_token_id = eos_token_id
52
+
53
+ super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id,
54
+ tie_word_embeddings=tie_word_embeddings, **kwargs)
55
+
56
+
57
+ class CodifyOnnxConfig(OnnxConfigWithPast):
58
+ torch_onnx_minimum_version = version.parse("1.12")
59
+
60
+ def __init__(
61
+ self,
62
+ config: PretrainedConfig,
63
+ task: str = "default",
64
+ patching_specs: List[PatchingSpec] = None,
65
+ use_past: bool = False,
66
+ ):
67
+ super().__init__(config, task=task, patching_specs=patching_specs, use_past=use_past)
68
+ if not getattr(self._config, "pad_token_id", None):
69
+ # TODO: how to do that better?
70
+ self._config.pad_token_id = 0
71
+
72
+ @property
73
+ def inputs(self) -> Mapping[str, Mapping[int, str]]:
74
+ common_inputs = OrderedDict({"input_ids": {0: "batch", 1: "sequence"}})
75
+ if self.use_past:
76
+ # BLOOM stores values on dynamic axis 2. For more details see: https://github.com/huggingface/transformers/pull/18344
77
+ self.fill_with_past_key_values_(common_inputs, direction="inputs", inverted_values_shape=True)
78
+ common_inputs["attention_mask"] = {0: "batch", 1: "past_sequence + sequence"}
79
+ else:
80
+ common_inputs["attention_mask"] = {0: "batch", 1: "sequence"}
81
+
82
+ return common_inputs
83
+
84
+ @property
85
+ def num_layers(self) -> int:
86
+ return self._config.num_hidden_layers
87
+
88
+ @property
89
+ def num_attention_heads(self) -> int:
90
+ return self._config.n_head
91
+
92
+ @property
93
+ def atol_for_validation(self) -> float:
94
+ return 1e-3
95
+
96
+ def generate_dummy_inputs(
97
+ self,
98
+ tokenizer: "PreTrainedTokenizer",
99
+ batch_size: int = -1,
100
+ seq_length: int = -1,
101
+ is_pair: bool = False,
102
+ framework: Optional["TensorType"] = None,
103
+ ) -> Mapping[str, Any]:
104
+ common_inputs = super(OnnxConfigWithPast, self).generate_dummy_inputs(
105
+ tokenizer, batch_size=batch_size, seq_length=seq_length, is_pair=is_pair, framework=framework
106
+ )
107
+
108
+ # We need to order the input in the way they appears in the forward()
109
+ ordered_inputs = OrderedDict({"input_ids": common_inputs["input_ids"]})
110
+
111
+ # Need to add the past_keys
112
+ if self.use_past:
113
+ if not is_torch_available():
114
+ raise ValueError("Cannot generate dummy past_keys inputs without PyTorch installed.")
115
+ else:
116
+ import torch
117
+
118
+ batch, seqlen = common_inputs["input_ids"].shape
119
+ # Not using the same length for past_key_values
120
+ past_key_values_length = seqlen + 2
121
+ head_dim = self._config.hidden_size // self.num_attention_heads
122
+ past_key_shape = (
123
+ batch * self.num_attention_heads,
124
+ head_dim,
125
+ past_key_values_length,
126
+ )
127
+ past_value_shape = (
128
+ batch * self.num_attention_heads,
129
+ past_key_values_length,
130
+ head_dim,
131
+ )
132
+ ordered_inputs["past_key_values"] = [
133
+ (torch.zeros(past_key_shape), torch.zeros(past_value_shape)) for _ in range(self.num_layers)
134
+ ]
135
+
136
+ ordered_inputs["attention_mask"] = common_inputs["attention_mask"]
137
+ if self.use_past:
138
+ mask_dtype = ordered_inputs["attention_mask"].dtype
139
+ ordered_inputs["attention_mask"] = torch.cat(
140
+ [ordered_inputs["attention_mask"], torch.ones(batch, past_key_values_length, dtype=mask_dtype)], dim=1
141
+ )
142
+
143
+ return ordered_inputs
144
+
145
+ @property
146
+ def default_onnx_opset(self) -> int:
147
+ return 13
148
+
149
+
150
+ from transformers import AutoConfig
151
+
152
+ AutoConfig.register(CodifyConfig.model_type, CodifyConfig)