Update README.md
Browse filesadding more information to README
README.md
CHANGED
@@ -61,7 +61,7 @@ DatasetDict({
|
|
61 |
|
62 |
<!-- Provide the basic links for the model. -->
|
63 |
|
64 |
-
- **Repository:**
|
65 |
- **Paper [optional]:** [More Information Needed]
|
66 |
- **Demo [optional]:** [More Information Needed]
|
67 |
|
@@ -71,7 +71,25 @@ The model is intended to be used for Text Classification.
|
|
71 |
|
72 |
### Direct Use
|
73 |
|
74 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
[More Information Needed]
|
77 |
|
@@ -112,15 +130,65 @@ Use the code below to get started with the model.
|
|
112 |
|
113 |
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
114 |
|
|
|
|
|
|
|
|
|
115 |
[More Information Needed]
|
116 |
|
117 |
### Training Procedure
|
118 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
119 |
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
120 |
|
121 |
#### Preprocessing [optional]
|
122 |
|
123 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
124 |
|
125 |
|
126 |
#### Training Hyperparameters
|
@@ -136,6 +204,27 @@ Use the code below to get started with the model.
|
|
136 |
## Evaluation
|
137 |
|
138 |
<!-- This section describes the evaluation protocols and provides the results. -->
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
139 |
|
140 |
### Testing Data, Factors & Metrics
|
141 |
|
@@ -195,11 +284,18 @@ Carbon emissions can be estimated using the [Machine Learning Impact calculator]
|
|
195 |
|
196 |
#### Hardware
|
197 |
|
198 |
-
|
|
|
|
|
199 |
|
200 |
#### Software
|
201 |
|
202 |
-
|
|
|
|
|
|
|
|
|
|
|
203 |
|
204 |
## Citation [optional]
|
205 |
|
|
|
61 |
|
62 |
<!-- Provide the basic links for the model. -->
|
63 |
|
64 |
+
- **Repository:** https://huggingface.co/snoop088/imdb_tuned-bloom1b1-sentiment-classifier/tree/main
|
65 |
- **Paper [optional]:** [More Information Needed]
|
66 |
- **Demo [optional]:** [More Information Needed]
|
67 |
|
|
|
71 |
|
72 |
### Direct Use
|
73 |
|
74 |
+
Example script to use the model. Please note that this is peft adapter on the Bloom 1b model:
|
75 |
+
|
76 |
+
```
|
77 |
+
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
|
78 |
+
model_name = 'snoop088/imdb_tuned-bloom1b1-sentiment-classifier'
|
79 |
+
loaded_model = AutoModelForSequenceClassification.from_pretrained(model_name,
|
80 |
+
trust_remote_code=True,
|
81 |
+
num_labels=2,
|
82 |
+
device_map="auto")
|
83 |
+
tokenizer = AutoTokenizer.from_pretrained(model_name, trust_remote_code=True)
|
84 |
+
tokenizer.pad_token = tokenizer.eos_token
|
85 |
+
|
86 |
+
my_set = pd.read_csv("./data/df_manual.csv")
|
87 |
+
|
88 |
+
inputs = tokenizer(list(my_set["review"]), truncation=True, padding="max_length", max_length=256, return_tensors="pt").to(DEVICE)
|
89 |
+
outputs = loaded_model(**inputs)
|
90 |
+
outcome = np.argmax(torch.Tensor.cpu(outputs.logits), axis=-1)
|
91 |
+
|
92 |
+
```
|
93 |
|
94 |
[More Information Needed]
|
95 |
|
|
|
130 |
|
131 |
<!-- This should link to a Dataset Card, perhaps with a short stub of information on what the training data is all about as well as documentation related to data pre-processing or additional filtering. -->
|
132 |
|
133 |
+
Training is done on the IMDB dataset available on the Hub:
|
134 |
+
|
135 |
+
[imdb](https://huggingface.co/datasets/imdb)
|
136 |
+
|
137 |
[More Information Needed]
|
138 |
|
139 |
### Training Procedure
|
140 |
|
141 |
+
```
|
142 |
+
training_arguments = TrainingArguments(
|
143 |
+
output_dir="your_tuned_model_name",
|
144 |
+
save_strategy="epoch",
|
145 |
+
per_device_train_batch_size=4,
|
146 |
+
per_device_eval_batch_size=4,
|
147 |
+
gradient_accumulation_steps=4,
|
148 |
+
optim="adamw_torch",
|
149 |
+
evaluation_strategy="steps",
|
150 |
+
logging_steps=5,
|
151 |
+
learning_rate=1e-5,
|
152 |
+
max_grad_norm = 0.3,
|
153 |
+
eval_steps=0.2,
|
154 |
+
num_train_epochs=2,
|
155 |
+
warmup_ratio= 0.1,
|
156 |
+
# group_by_length=True,
|
157 |
+
fp16=False,
|
158 |
+
weight_decay=0.001,
|
159 |
+
lr_scheduler_type="constant",
|
160 |
+
)
|
161 |
+
|
162 |
+
peft_model = get_peft_model(model, LoraConfig(
|
163 |
+
task_type="SEQ_CLS",
|
164 |
+
r=16,
|
165 |
+
lora_alpha=16,
|
166 |
+
target_modules=[
|
167 |
+
'query_key_value',
|
168 |
+
'dense'
|
169 |
+
],
|
170 |
+
bias="none",
|
171 |
+
lora_dropout=0.05, # Conventional
|
172 |
+
))
|
173 |
+
|
174 |
+
```
|
175 |
+
LORA results in: trainable params: 3,542,016 || all params: 1,068,859,392 || trainable%: 0.3313827830405592
|
176 |
+
|
177 |
<!-- This relates heavily to the Technical Specifications. Content here should link to that section when it is relevant to the training procedure. -->
|
178 |
|
179 |
#### Preprocessing [optional]
|
180 |
|
181 |
+
Simple preprocessing with DataCollator:
|
182 |
+
|
183 |
+
```
|
184 |
+
def process_data(example):
|
185 |
+
item = tokenizer(example["review"], truncation=True, max_length=320) # see if this is OK for dyn padding
|
186 |
+
item["labels"] = [ 1 if sent == 'positive' else 0 for sent in example["sentiment"]]
|
187 |
+
return item
|
188 |
+
|
189 |
+
tokenised_data = tokenised_data.remove_columns(["review", "sentiment"])
|
190 |
+
data_collator = DataCollatorWithPadding(tokenizer=tokenizer)
|
191 |
+
```
|
192 |
|
193 |
|
194 |
#### Training Hyperparameters
|
|
|
204 |
## Evaluation
|
205 |
|
206 |
<!-- This section describes the evaluation protocols and provides the results. -->
|
207 |
+
Evaluation function:
|
208 |
+
```
|
209 |
+
import evaluate
|
210 |
+
|
211 |
+
def compute_metrics(eval_pred):
|
212 |
+
# All metrics are already predefined in the HF `evaluate` package
|
213 |
+
precision_metric = evaluate.load("precision")
|
214 |
+
recall_metric = evaluate.load("recall")
|
215 |
+
f1_metric= evaluate.load("f1")
|
216 |
+
accuracy_metric = evaluate.load("accuracy")
|
217 |
+
|
218 |
+
logits, labels = eval_pred # eval_pred is the tuple of predictions and labels returned by the model
|
219 |
+
predictions = np.argmax(logits, axis=-1)
|
220 |
+
precision = precision_metric.compute(predictions=predictions, references=labels)["precision"]
|
221 |
+
recall = recall_metric.compute(predictions=predictions, references=labels)["recall"]
|
222 |
+
f1 = f1_metric.compute(predictions=predictions, references=labels)["f1"]
|
223 |
+
accuracy = accuracy_metric.compute(predictions=predictions, references=labels)["accuracy"]
|
224 |
+
# The trainer is expecting a dictionary where the keys are the metrics names and the values are the scores.
|
225 |
+
return {"precision": precision, "recall": recall, "f1-score": f1, 'accuracy': accuracy}
|
226 |
+
|
227 |
+
```
|
228 |
|
229 |
### Testing Data, Factors & Metrics
|
230 |
|
|
|
284 |
|
285 |
#### Hardware
|
286 |
|
287 |
+
- Model: 6.183.1 "13th Gen Intel(R) Core(TM) i9-13900K"
|
288 |
+
- GPU: Nvidia RTX 4900/ 24 GB
|
289 |
+
- Memory: 64 GB
|
290 |
|
291 |
#### Software
|
292 |
|
293 |
+
- python 3.11.6
|
294 |
+
- transformers 4.36.2
|
295 |
+
- torch 2.1.2
|
296 |
+
- peft 0.7.1
|
297 |
+
- numpy 1.26.2
|
298 |
+
- datasets 2.16.0
|
299 |
|
300 |
## Citation [optional]
|
301 |
|