smallcloudteam commited on
Commit
2f812a3
·
1 Parent(s): 12d2162

Upload config

Browse files
Files changed (2) hide show
  1. config.json +3 -0
  2. configuration_codify.py +152 -0
config.json CHANGED
@@ -23,6 +23,9 @@
23
  "d"
24
  ],
25
  "attn_sparse_layout_seq": null,
 
 
 
26
  "backcheck_pw": "none",
27
  "backcheck_sa": "none",
28
  "bos_token_id": 1,
 
23
  "d"
24
  ],
25
  "attn_sparse_layout_seq": null,
26
+ "auto_map": {
27
+ "AutoConfig": "configuration_codify.CodifyConfig"
28
+ },
29
  "backcheck_pw": "none",
30
  "backcheck_sa": "none",
31
  "bos_token_id": 1,
configuration_codify.py ADDED
@@ -0,0 +1,152 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from collections import OrderedDict
2
+ from typing import TYPE_CHECKING, Any, List, Mapping, Optional
3
+
4
+ from packaging import version
5
+
6
+ from transformers import is_torch_available
7
+
8
+ if TYPE_CHECKING:
9
+ from transformers import PreTrainedTokenizer, TensorType
10
+
11
+ from transformers.configuration_utils import PretrainedConfig
12
+ from transformers.onnx import OnnxConfigWithPast, PatchingSpec
13
+ from transformers.utils import logging
14
+
15
+ logger = logging.get_logger(__name__)
16
+
17
+ CODIFY_PRETRAINED_CONFIG_ARCHIVE_MAP = {
18
+ "smallcloudai/codify_medium_multi": "https://huggingface.co/smallcloudai/codify_medium_multi/blob/main/config.json",
19
+ "smallcloudai/codify_3b_multi": "https://huggingface.co/smallcloudai/codify_3b_multi/blob/main/config.json",
20
+ }
21
+
22
+
23
+ class CodifyConfig(PretrainedConfig):
24
+ model_type = "codify"
25
+ keys_to_ignore_at_inference = ["past_key_values"]
26
+ attribute_map = {
27
+ "num_hidden_layers": "L",
28
+ "num_attention_heads": "attn_heads",
29
+ "hidden_size": "E",
30
+ }
31
+
32
+ def __init__(
33
+ self,
34
+ vocab_size=51305,
35
+ layer_norm_epsilon=1e-5,
36
+ initializer_range=0.02,
37
+ use_cache=True,
38
+ bos_token_id=1,
39
+ eos_token_id=2,
40
+ mlp_mult=4,
41
+ tie_word_embeddings=False,
42
+ **kwargs,
43
+ ):
44
+ self.vocab_size = vocab_size
45
+ self.mlp_mult = mlp_mult
46
+ self.layer_norm_epsilon = layer_norm_epsilon
47
+ self.initializer_range = initializer_range
48
+ self.use_cache = use_cache
49
+
50
+ self.bos_token_id = bos_token_id
51
+ self.eos_token_id = eos_token_id
52
+
53
+ super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id,
54
+ tie_word_embeddings=tie_word_embeddings, **kwargs)
55
+
56
+
57
+ class CodifyOnnxConfig(OnnxConfigWithPast):
58
+ torch_onnx_minimum_version = version.parse("1.12")
59
+
60
+ def __init__(
61
+ self,
62
+ config: PretrainedConfig,
63
+ task: str = "default",
64
+ patching_specs: List[PatchingSpec] = None,
65
+ use_past: bool = False,
66
+ ):
67
+ super().__init__(config, task=task, patching_specs=patching_specs, use_past=use_past)
68
+ if not getattr(self._config, "pad_token_id", None):
69
+ # TODO: how to do that better?
70
+ self._config.pad_token_id = 0
71
+
72
+ @property
73
+ def inputs(self) -> Mapping[str, Mapping[int, str]]:
74
+ common_inputs = OrderedDict({"input_ids": {0: "batch", 1: "sequence"}})
75
+ if self.use_past:
76
+ # BLOOM stores values on dynamic axis 2. For more details see: https://github.com/huggingface/transformers/pull/18344
77
+ self.fill_with_past_key_values_(common_inputs, direction="inputs", inverted_values_shape=True)
78
+ common_inputs["attention_mask"] = {0: "batch", 1: "past_sequence + sequence"}
79
+ else:
80
+ common_inputs["attention_mask"] = {0: "batch", 1: "sequence"}
81
+
82
+ return common_inputs
83
+
84
+ @property
85
+ def num_layers(self) -> int:
86
+ return self._config.num_hidden_layers
87
+
88
+ @property
89
+ def num_attention_heads(self) -> int:
90
+ return self._config.n_head
91
+
92
+ @property
93
+ def atol_for_validation(self) -> float:
94
+ return 1e-3
95
+
96
+ def generate_dummy_inputs(
97
+ self,
98
+ tokenizer: "PreTrainedTokenizer",
99
+ batch_size: int = -1,
100
+ seq_length: int = -1,
101
+ is_pair: bool = False,
102
+ framework: Optional["TensorType"] = None,
103
+ ) -> Mapping[str, Any]:
104
+ common_inputs = super(OnnxConfigWithPast, self).generate_dummy_inputs(
105
+ tokenizer, batch_size=batch_size, seq_length=seq_length, is_pair=is_pair, framework=framework
106
+ )
107
+
108
+ # We need to order the input in the way they appears in the forward()
109
+ ordered_inputs = OrderedDict({"input_ids": common_inputs["input_ids"]})
110
+
111
+ # Need to add the past_keys
112
+ if self.use_past:
113
+ if not is_torch_available():
114
+ raise ValueError("Cannot generate dummy past_keys inputs without PyTorch installed.")
115
+ else:
116
+ import torch
117
+
118
+ batch, seqlen = common_inputs["input_ids"].shape
119
+ # Not using the same length for past_key_values
120
+ past_key_values_length = seqlen + 2
121
+ head_dim = self._config.hidden_size // self.num_attention_heads
122
+ past_key_shape = (
123
+ batch * self.num_attention_heads,
124
+ head_dim,
125
+ past_key_values_length,
126
+ )
127
+ past_value_shape = (
128
+ batch * self.num_attention_heads,
129
+ past_key_values_length,
130
+ head_dim,
131
+ )
132
+ ordered_inputs["past_key_values"] = [
133
+ (torch.zeros(past_key_shape), torch.zeros(past_value_shape)) for _ in range(self.num_layers)
134
+ ]
135
+
136
+ ordered_inputs["attention_mask"] = common_inputs["attention_mask"]
137
+ if self.use_past:
138
+ mask_dtype = ordered_inputs["attention_mask"].dtype
139
+ ordered_inputs["attention_mask"] = torch.cat(
140
+ [ordered_inputs["attention_mask"], torch.ones(batch, past_key_values_length, dtype=mask_dtype)], dim=1
141
+ )
142
+
143
+ return ordered_inputs
144
+
145
+ @property
146
+ def default_onnx_opset(self) -> int:
147
+ return 13
148
+
149
+
150
+ from transformers import AutoConfig
151
+
152
+ AutoConfig.register(CodifyConfig.model_type, CodifyConfig)