|
import pandas as pd |
|
import torch |
|
import numpy as np |
|
|
|
from transformers import AutoModelForSequenceClassification |
|
from transformers import AutoTokenizer |
|
|
|
model = AutoModelForSequenceClassification.from_pretrained("deberta-classification-chatrag/checkpoint-6342") |
|
tokenizer = AutoTokenizer.from_pretrained("deberta-classification-chatrag/checkpoint-6342") |
|
|
|
|
|
result = ["Comment puis-je renouveler un passeport ?", "Combien font deux et deux ?", "Écris un début de lettre de recommandation pour la Dinum"] |
|
|
|
result = pd.DataFrame(result, columns=['query']) |
|
|
|
complete_probabilities = [] |
|
|
|
for text in result["query"].tolist(): |
|
encoding = tokenizer(text, return_tensors="pt") |
|
encoding = {k: v.to(model.device) for k,v in encoding.items()} |
|
|
|
outputs = model(**encoding) |
|
|
|
logits = outputs.logits |
|
logits.shape |
|
|
|
|
|
sigmoid = torch.nn.Sigmoid() |
|
probs = sigmoid(logits.squeeze().cpu()) |
|
predictions = np.zeros(probs.shape) |
|
|
|
|
|
float_value = probs.item() |
|
|
|
complete_probabilities.append(float_value) |
|
|
|
result["prob"] = complete_probabilities |
|
|
|
print(result) |
|
|