Yeyito_gpu
Unloading models + current evals
05b3b2d
from tqdm import tqdm
import torch
from torch.nn import CrossEntropyLoss
def evaluate_model(model, tokenizer, dl):
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
#model = model.to(device)
losses = []
for batch in dl:
batch = tokenizer(batch, padding=True, return_tensors='pt', truncation=True, max_length=150)
labels = torch.tensor([
[-100 if mask == 0 else token for mask, token in mask_and_tokens] for mask_and_tokens in [zip(masks, labels) for masks, labels in zip(batch['attention_mask'], batch['input_ids'])]
])
batch['labels'] = labels
batch = {k: v.to(device) for k, v in batch.items()}
with torch.no_grad():
outputs = model(batch['input_ids'], attention_mask=batch['attention_mask'], labels=batch['labels'])
shift_logits = outputs.logits[..., :-1, :].contiguous()
shift_labels = batch['labels'][..., 1:].contiguous()
loss_fct = CrossEntropyLoss(reduction='none')
loss = loss_fct(shift_logits.transpose(1,2), shift_labels)
num_tokens = torch.sum(shift_labels != -100, dim=1)
loss_sum = torch.sum(loss, dim=1)
loss = loss_sum / num_tokens
losses.append(loss)
losses = torch.cat(losses)
return losses