svakhreev commited on
Commit
da95b3f
1 Parent(s): 1814817

Update configuration_gpt_refact.py

Browse files
Files changed (1) hide show
  1. configuration_gpt_refact.py +20 -30
configuration_gpt_refact.py CHANGED
@@ -1,7 +1,6 @@
1
  from transformers.configuration_utils import PretrainedConfig
2
  from transformers.utils import logging
3
 
4
-
5
  logger = logging.get_logger(__name__)
6
 
7
 
@@ -16,26 +15,23 @@ class GPTRefactConfig(PretrainedConfig):
16
  }
17
 
18
  def __init__(
19
- self,
20
- vocab_size: int = 49216,
21
- n_positions: int = 4096,
22
- n_embd: int = 1024,
23
- n_layer: int = 32,
24
- n_head: int = 64,
25
- max_position_embeddings: int = 4096,
26
- multi_query: bool = True,
27
- layer_norm_epsilon=1e-5,
28
- initializer_range=0.02,
29
- scale_attn_weights=True,
30
- use_cache=True,
31
- bos_token_id=-1,
32
- eos_token_id=0,
33
- attention_softmax_in_fp32=False,
34
- scale_attention_softmax_in_fp32=False,
35
- resid_pdrop=0.1,
36
- embd_pdrop=0.1,
37
- attn_pdrop=0.1,
38
- **kwargs,
39
  ):
40
  self.vocab_size = vocab_size
41
  self.n_positions = n_positions
@@ -43,19 +39,13 @@ class GPTRefactConfig(PretrainedConfig):
43
  self.n_layer = n_layer
44
  self.n_head = n_head
45
  self.n_inner = None
46
- self.resid_pdrop = resid_pdrop
47
- self.embd_pdrop = embd_pdrop
48
- self.attn_pdrop = attn_pdrop
49
  self.layer_norm_epsilon = layer_norm_epsilon
50
  self.initializer_range = initializer_range
51
- self.scale_attn_weights = scale_attn_weights
52
  self.use_cache = use_cache
53
  self.attention_softmax_in_fp32 = attention_softmax_in_fp32
54
  self.scale_attention_softmax_in_fp32 = scale_attention_softmax_in_fp32
55
-
56
- self.bos_token_id = bos_token_id
57
- self.eos_token_id = eos_token_id
58
-
59
  self.multi_query = multi_query
60
  self.max_position_embeddings = max_position_embeddings
61
- super().__init__(bos_token_id=bos_token_id, eos_token_id=eos_token_id, **kwargs)
 
 
1
  from transformers.configuration_utils import PretrainedConfig
2
  from transformers.utils import logging
3
 
 
4
  logger = logging.get_logger(__name__)
5
 
6
 
 
15
  }
16
 
17
  def __init__(
18
+ self,
19
+ vocab_size: int = 49216,
20
+ n_positions: int = 4096,
21
+ n_embd: int = 1024,
22
+ n_layer: int = 32,
23
+ n_head: int = 64,
24
+ max_position_embeddings: int = 4096,
25
+ multi_query: bool = True,
26
+ layer_norm_epsilon: float = 1e-5,
27
+ initializer_range: float = 0.02,
28
+ use_cache: bool = True,
29
+ eos_token_id: int = 0,
30
+ attention_softmax_in_fp32: bool = True,
31
+ scale_attention_softmax_in_fp32: bool = True,
32
+ attention_bias_in_fp32: bool = True,
33
+ torch_dtype: str = 'bfloat16',
34
+ **kwargs,
 
 
 
35
  ):
36
  self.vocab_size = vocab_size
37
  self.n_positions = n_positions
 
39
  self.n_layer = n_layer
40
  self.n_head = n_head
41
  self.n_inner = None
 
 
 
42
  self.layer_norm_epsilon = layer_norm_epsilon
43
  self.initializer_range = initializer_range
 
44
  self.use_cache = use_cache
45
  self.attention_softmax_in_fp32 = attention_softmax_in_fp32
46
  self.scale_attention_softmax_in_fp32 = scale_attention_softmax_in_fp32
47
+ self.attention_bias_in_fp32 = attention_bias_in_fp32
 
 
 
48
  self.multi_query = multi_query
49
  self.max_position_embeddings = max_position_embeddings
50
+ self.torch_dtype = torch_dtype
51
+ super().__init__(eos_token_id=eos_token_id, **kwargs)