smallcloudai
/

Refact-1_6B-fim

@@ -30,8 +30,8 @@ class GPTRefactConfig(PretrainedConfig):
         use_cache=True,
         bos_token_id=-1,
         eos_token_id=0,
-        attention_softmax_in_fp32=False,
-        scale_attention_softmax_in_fp32=False,
         resid_pdrop=0.1,
         embd_pdrop=0.1,
         attn_pdrop=0.1,

         use_cache=True,
         bos_token_id=-1,
         eos_token_id=0,
+        attention_softmax_in_fp32=True,
+        scale_attention_softmax_in_fp32=True,
         resid_pdrop=0.1,
         embd_pdrop=0.1,
         attn_pdrop=0.1,