shainaraza commited on
Commit
e53b6a6
1 Parent(s): 1d52e8b

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +50 -0
README.md ADDED
@@ -0,0 +1,50 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ```
2
+ # Load model directly
3
+ from transformers import AutoTokenizer, AutoModelForTokenClassification
4
+
5
+ tokenizer = AutoTokenizer.from_pretrained("newsmediabias/UnBIAS-Roberta-NER")
6
+ model = AutoModelForTokenClassification.from_pretrained("newsmediabias/UnBIAS-Roberta-NER")
7
+
8
+ # Example batch of sentences
9
+ sentences = [
10
+ "The corrupt politician embezzled funds.",
11
+ "Immigrants are causing a surge in crime.",
12
+ "The movie star is an idiot for their political views.",
13
+ "Only a fool would believe in climate change.",
14
+ "The new policy will destroy the economy."
15
+ ]
16
+
17
+ # Tokenize the batch
18
+ encoding = tokenizer(sentences, return_tensors='pt', padding=True, truncation=True)
19
+
20
+ # Get model predictions
21
+ outputs = model(**encoding)
22
+
23
+ # Apply softmax to the output logits to get probabilities
24
+ predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
25
+
26
+ # Get the highest probability labels for each token
27
+ predicted_labels = torch.argmax(predictions, dim=-1)
28
+
29
+ # Define a mapping for the labels
30
+ label_mapping = {
31
+ 0: "O", # No bias
32
+ 1: "B-BIAS", # Beginning of a biased sequence
33
+ 2: "I-BIAS" # Inside a biased sequence
34
+ }
35
+
36
+ # Convert predicted labels to their corresponding label names using the mapping
37
+ labels = [[label_mapping[label_id.item()] for label_id in sentence_labels] for sentence_labels in predicted_labels]
38
+
39
+ # Align labels with the words in the sentences
40
+ aligned_labels = []
41
+ for i, sentence_labels in enumerate(labels):
42
+ # Get the tokens from the original sentence
43
+ tokens = tokenizer.convert_ids_to_tokens(encoding['input_ids'][i])
44
+ # Only consider labels for tokens that are not special tokens
45
+ sentence_labels = [label for token, label in zip(tokens, sentence_labels) if token not in tokenizer.all_special_tokens]
46
+ aligned_labels.append(sentence_labels)
47
+
48
+ # Print the aligned labels for each sentence
49
+ for sentence, labels in zip(sentences, aligned_labels):
50
+ print(f"Sentence: {sentence}\nLabels: {labels}\n")