chandar-lab
/

AMPLIFY_120M_base

@@ -246,6 +246,10 @@ class AMPLIFY(AMPLIFYPreTrainedModel):
         # Expand and repeat: (Batch, Length) -> (Batch, Heads, Length, Length)
         if attention_mask is not None and not torch.all(attention_mask == 0):
             attention_mask = (
                 attention_mask.unsqueeze(1)
                 .unsqueeze(1)

         # Expand and repeat: (Batch, Length) -> (Batch, Heads, Length, Length)
         if attention_mask is not None and not torch.all(attention_mask == 0):
+            assert attention_mask.dtype != torch.bool and 1.0 not in attention_mask, (
+                "AMPLIFY expects an additive attention_mask.\n"
+                "Modify the output of the tokenizer with attention_mask = torch.where(attention_mask, float(0.0), float('-inf'))"
+            )
             attention_mask = (
                 attention_mask.unsqueeze(1)
                 .unsqueeze(1)