README.md · newsmediabias/UnBIAS-RoBERTa-NER at main

# Load model directly
from transformers import AutoTokenizer, AutoModelForTokenClassification

tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-Roberta-NER")
model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-Roberta-NER")

# Example batch of sentences
sentences = [
    "The corrupt politician embezzled funds.",
    "Immigrants are causing a surge in crime.",
    "The movie star is an idiot for their political views.",
    "Only a fool would believe in climate change.",
    "The new policy will destroy the economy."
]

# Tokenize the batch
encoding = tokenizer(sentences, return_tensors='pt', padding=True, truncation=True)

# Get model predictions
outputs = model(**encoding)

# Apply softmax to the output logits to get probabilities
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)

# Get the highest probability labels for each token
predicted_labels = torch.argmax(predictions, dim=-1)

# Define a mapping for the labels
label_mapping = {
    0: "O",      # No bias
    1: "B-BIAS", # Beginning of a biased sequence
    2: "I-BIAS"  # Inside a biased sequence
}

# Convert predicted labels to their corresponding label names using the mapping
labels = [[label_mapping[label_id.item()] for label_id in sentence_labels] for sentence_labels in predicted_labels]

# Align labels with the words in the sentences
aligned_labels = []
for i, sentence_labels in enumerate(labels):
    # Get the tokens from the original sentence
    tokens = tokenizer.convert_ids_to_tokens(encoding['input_ids'][i])
    # Only consider labels for tokens that are not special tokens
    sentence_labels = [label for token, label in zip(tokens, sentence_labels) if token not in tokenizer.all_special_tokens]
    aligned_labels.append(sentence_labels)

# Print the aligned labels for each sentence
for sentence, labels in zip(sentences, aligned_labels):
    print(f"Sentence: {sentence}\nLabels: {labels}\n")