davda54 commited on
Commit
7eb7352
·
verified ·
1 Parent(s): 7e737af

Use approximate GELU from PyTorch

Browse files
Files changed (1) hide show
  1. modeling_norbert.py +1 -3
modeling_norbert.py CHANGED
@@ -8,7 +8,6 @@ from torch.utils import checkpoint
8
 
9
  from .configuration_norbert import NorbertConfig
10
  from transformers.modeling_utils import PreTrainedModel
11
- from transformers.activations import gelu_new
12
  from transformers.modeling_outputs import (
13
  MaskedLMOutput,
14
  MultipleChoiceModelOutput,
@@ -17,7 +16,6 @@ from transformers.modeling_outputs import (
17
  TokenClassifierOutput,
18
  BaseModelOutput
19
  )
20
- from transformers.pytorch_utils import softmax_backward_data
21
 
22
 
23
  class Encoder(nn.Module):
@@ -81,7 +79,7 @@ class EncoderLayer(nn.Module):
81
  class GeGLU(nn.Module):
82
  def forward(self, x):
83
  x, gate = x.chunk(2, dim=-1)
84
- x = x * gelu_new(gate)
85
  return x
86
 
87
 
 
8
 
9
  from .configuration_norbert import NorbertConfig
10
  from transformers.modeling_utils import PreTrainedModel
 
11
  from transformers.modeling_outputs import (
12
  MaskedLMOutput,
13
  MultipleChoiceModelOutput,
 
16
  TokenClassifierOutput,
17
  BaseModelOutput
18
  )
 
19
 
20
 
21
  class Encoder(nn.Module):
 
79
  class GeGLU(nn.Module):
80
  def forward(self, x):
81
  x, gate = x.chunk(2, dim=-1)
82
+ x = x * F.gelu(gate, approximate="tanh")
83
  return x
84
 
85