davidhd commited on
Commit
e27bb0d
·
verified ·
1 Parent(s): f8e06e7

Fix attention mask dtype issue

Browse files

The attention mask in AMPLIFY should be additive. Adds an assert to check this condition.

Files changed (1) hide show
  1. amplify.py +4 -0
amplify.py CHANGED
@@ -246,6 +246,10 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
246
 
247
  # Expand and repeat: (Batch, Length) -> (Batch, Heads, Length, Length)
248
  if attention_mask is not None and not torch.all(attention_mask == 0):
 
 
 
 
249
  attention_mask = (
250
  attention_mask.unsqueeze(1)
251
  .unsqueeze(1)
 
246
 
247
  # Expand and repeat: (Batch, Length) -> (Batch, Heads, Length, Length)
248
  if attention_mask is not None and not torch.all(attention_mask == 0):
249
+ assert attention_mask.dtype != torch.bool and 1.0 not in attention_mask, (
250
+ "AMPLIFY expects an additive attention_mask.\n"
251
+ "Modify the output of the tokenizer with attention_mask = torch.where(attention_mask, float(0.0), float('-inf'))"
252
+ )
253
  attention_mask = (
254
  attention_mask.unsqueeze(1)
255
  .unsqueeze(1)