Update configuration_gpt_refact.py
Browse filesDo some operations in attention block in fp32 to preserve the model's performance
configuration_gpt_refact.py
CHANGED
@@ -30,8 +30,8 @@ class GPTRefactConfig(PretrainedConfig):
|
|
30 |
use_cache=True,
|
31 |
bos_token_id=-1,
|
32 |
eos_token_id=0,
|
33 |
-
attention_softmax_in_fp32=
|
34 |
-
scale_attention_softmax_in_fp32=
|
35 |
resid_pdrop=0.1,
|
36 |
embd_pdrop=0.1,
|
37 |
attn_pdrop=0.1,
|
|
|
30 |
use_cache=True,
|
31 |
bos_token_id=-1,
|
32 |
eos_token_id=0,
|
33 |
+
attention_softmax_in_fp32=True,
|
34 |
+
scale_attention_softmax_in_fp32=True,
|
35 |
resid_pdrop=0.1,
|
36 |
embd_pdrop=0.1,
|
37 |
attn_pdrop=0.1,
|