McGill-NLP
/

LLM2Vec-Mistral-7B-Instruct-v2-mntp

Model card Files Files and versions Community

vaibhavad commited on Apr 4, 2024

Commit

445e7cb

verified ·

1 Parent(s): f9a0043

Update modeling_mistral_encoder.py

Browse files

Files changed (1) hide show

modeling_mistral_encoder.py +0 -66

modeling_mistral_encoder.py CHANGED Viewed

@@ -13,15 +13,6 @@ from .attn_mask_utils import _prepare_4d_causal_attention_mask
 logger = logging.get_logger(__name__)
-def batch_to_device(batch, target_device: device):
-    """
-    send a pytorch batch to a device (CPU/GPU)
-    """
-    for key in batch:
-        if isinstance(batch[key], Tensor):
-            batch[key] = batch[key].to(target_device)
-    return batch
 class ModifiedMistralAttention(MistralAttention):
     def __init__(self, *args, **kwargs):
@@ -218,60 +209,3 @@ class MistralEncoderModel(MistralModel):
             hidden_states=all_hidden_states,
             attentions=all_self_attns,
         )
-    def prepare_for_tokenization(self, text):
-        text = '[INST] ' + text.strip() + ' [/INST]'
-        # if self.pooling_mode == "eos_token":
-        #     text = text.strip() + ' </s>'
-        return text
-    def tokenize(self, texts):
-        # return self.tokenizer(texts, return_tensors='pt', padding=True, truncation=True, max_length=self.max_length)
-        texts_2 = []
-        original_texts = []
-        for text in texts:
-            t = text.split("!@#$%^&*()")
-            texts_2.append(t[1])
-            original_texts.append("".join(t))
-        original = self.tokenizer(original_texts, return_tensors='pt', padding=True, truncation=True, max_length=self.max_length)
-        embed_mask = None
-        for t_i, t in enumerate(texts_2):
-            ids = self.tokenizer([t], return_tensors='pt', padding=True, truncation=True, max_length=self.max_length, add_special_tokens=False)
-            if embed_mask is None:
-                e_m = torch.zeros_like(original["attention_mask"][t_i])
-                if len(ids["input_ids"][0]) > 0:
-                    e_m[-len(ids["input_ids"][0]):] = torch.ones(len(ids["input_ids"][0]))
-                embed_mask = e_m.unsqueeze(0)
-            else:
-                e_m = torch.zeros_like(original["attention_mask"][t_i])
-                if len(ids["input_ids"][0]) > 0:
-                    e_m[-len(ids["input_ids"][0]):] = torch.ones(len(ids["input_ids"][0]))
-                embed_mask = torch.cat((embed_mask, e_m.unsqueeze(0)), dim=0)
-        original["embed_mask"] = embed_mask
-        return original
-    def _skip_instruction(self, sentence_feature):
-        assert sentence_feature["attention_mask"].shape == sentence_feature["embed_mask"].shape
-        sentence_feature["attention_mask"] = sentence_feature["embed_mask"]
-    def _encode(self, sentences_batch, device, convert_to_numpy, multiprocessing=False):
-        if multiprocessing:
-            rank = mp.current_process()._identity[0]
-            if device is None and torch.cuda.is_available():
-                device = f"cuda:{rank % torch.cuda.device_count()}"
-        self.to(device)
-        features = self.tokenize([self.prepare_for_tokenization(sentence) for sentence in sentences_batch])
-        features = batch_to_device(features, device)
-        with torch.no_grad():
-            embeddings = self.forward(features)
-            embeddings = embeddings.detach()
-            embeddings = embeddings.cpu()
-        return embeddings

 logger = logging.get_logger(__name__)
 class ModifiedMistralAttention(MistralAttention):
     def __init__(self, *args, **kwargs):
             hidden_states=all_hidden_states,
             attentions=all_self_attns,
         )