mohres commited on
Commit
ecbd3b0
·
verified ·
1 Parent(s): 5539216

Update README.md

Browse files
Files changed (1) hide show
  1. README.md +68 -0
README.md CHANGED
@@ -51,6 +51,74 @@ More information needed
51
 
52
  ## Training procedure
53
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
54
 
55
  ### Training hyperparameters
56
 
 
51
 
52
  ## Training procedure
53
 
54
+ ```
55
+ import torch
56
+ from datasets import load_dataset
57
+ from transformers import (
58
+ AutoModelForSequenceClassification,
59
+ AutoTokenizer,
60
+ DataCollatorWithPadding,
61
+ TrainingArguments,
62
+ Trainer
63
+ )
64
+
65
+
66
+ device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
67
+
68
+ labr = load_dataset("labr")
69
+
70
+ labels = {0,1,2,3,4}
71
+ target_names = [
72
+ "Poor",
73
+ "Fair",
74
+ "Good",
75
+ "Very Good",
76
+ "Excellent"
77
+ ]
78
+
79
+ id2label = {idx: label for idx, label in enumerate(target_names)}
80
+ label2id = {label: idx for idx, label in enumerate(target_names)}
81
+
82
+
83
+ BERT_MODEL = "google-bert/bert-base-multilingual-uncased"
84
+
85
+ model = AutoModelForSequenceClassification.from_pretrained(BERT_MODEL, num_labels = len(id2label))
86
+ tokenizer = AutoTokenizer.from_pretrained(BERT_MODEL)
87
+
88
+ model.to(device)
89
+
90
+
91
+ def preprocess_function(examples):
92
+ return tokenizer(examples["text"], truncation=True)
93
+
94
+ tokenized_labr = labr.map(preprocess_function, batched=True)
95
+
96
+
97
+ data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
98
+
99
+ training_args = TrainingArguments(
100
+ output_dir="Arabic-Book-Review-Sentiment-Assessment",
101
+ learning_rate=2e-5,
102
+ per_device_train_batch_size=8,
103
+ per_device_eval_batch_size=8,
104
+ num_train_epochs=1,
105
+ weight_decay=0.01,
106
+ push_to_hub=True
107
+ )
108
+
109
+ trainer = Trainer(
110
+ model=model,
111
+ args=training_args,
112
+ train_dataset=tokenized_labr["train"],
113
+ eval_dataset=tokenized_labr["test"],
114
+ tokenizer=tokenizer,
115
+ data_collator=data_collator,
116
+ )
117
+
118
+ trainer.train()
119
+
120
+ trainer.evaluate(tokenized_labr["test"])
121
+ ```
122
 
123
  ### Training hyperparameters
124