Raghavan
/

indictrans2-en-indic-dist-200M

text2text-generation

Model card Files Files and versions Community

Raghavan commited on Jan 20

Commit

140295b

•

1 Parent(s): 02d1844

Upload 7 files

Files changed (1) hide show

modeling_indictrans.py +5 -3

modeling_indictrans.py CHANGED Viewed

@@ -61,11 +61,12 @@ def shift_tokens_right(input_ids: torch.Tensor, pad_token_id: int, decoder_start
 def prepare_decoder_input_ids_label(decoder_input_ids, decoder_attention_mask):
-    labels = decoder_input_ids[:, 1:]
     labels_mask = labels == 1
     labels[labels_mask] = -100
     mask = (decoder_input_ids == eos_token_id)
     decoder_input_ids[mask] = 1
     decoder_attention_mask[mask] = 0
@@ -75,6 +76,7 @@ def prepare_decoder_input_ids_label(decoder_input_ids, decoder_attention_mask):
     return decoder_input_ids, decoder_attention_mask, labels
 # Copied from transformers.models.bart.modeling_bart._make_causal_mask
 def _make_causal_mask(
         input_ids_shape: torch.Size, dtype: torch.dtype, device: torch.device, past_key_values_length: int = 0

 def prepare_decoder_input_ids_label(decoder_input_ids, decoder_attention_mask):
+    labels = decoder_input_ids.full_size(decoder_input_ids.size(), -100)
+    labels[:, :-1] = decoder_input_ids[:, 1:]
     labels_mask = labels == 1
     labels[labels_mask] = -100
     mask = (decoder_input_ids == eos_token_id)
     decoder_input_ids[mask] = 1
     decoder_attention_mask[mask] = 0
     return decoder_input_ids, decoder_attention_mask, labels
 # Copied from transformers.models.bart.modeling_bart._make_causal_mask
 def _make_causal_mask(
         input_ids_shape: torch.Size, dtype: torch.dtype, device: torch.device, past_key_values_length: int = 0