diogopaes10 commited on
Commit
8e6bfcc
Β·
1 Parent(s): 7f7aba6

End of training

Browse files
checkpoint-320/pytorch_model.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:545aa19eb4410fc3feed598b75c981bfdcbb6c952229824c6bd1b04e161f9f84
3
- size 737788917
 
 
 
 
checkpoint-320/rng_state.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b3143a2798a49b9dcf15a141512c86b6add165baf4f859e660e7985e9d40e60
3
- size 14575
 
 
 
 
checkpoint-320/scheduler.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:29ea6a10a633bcfa1c650a32be0ea72bf6870f43bc564bbaa5c04ac08767fab3
3
- size 627
 
 
 
 
checkpoint-320/trainer_state.json DELETED
@@ -1,334 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 32.0,
5
- "global_step": 320,
6
- "is_hyper_param_search": false,
7
- "is_local_process_zero": true,
8
- "is_world_process_zero": true,
9
- "log_history": [
10
- {
11
- "epoch": 0.1,
12
- "learning_rate": 1.9960000000000002e-05,
13
- "loss": 2.314,
14
- "step": 1
15
- },
16
- {
17
- "epoch": 2.5,
18
- "learning_rate": 1.9e-05,
19
- "loss": 2.311,
20
- "step": 25
21
- },
22
- {
23
- "epoch": 2.5,
24
- "eval_accuracy": 0.15,
25
- "eval_disk_space_total": 78.1898422241211,
26
- "eval_disk_space_used": 36.27968215942383,
27
- "eval_f1": 0.08571428571428572,
28
- "eval_gpu_ram_allocated": 4.843059062957764,
29
- "eval_gpu_ram_cached": 7.046875,
30
- "eval_gpu_ram_total": 39.56402587890625,
31
- "eval_gpu_utilization": 40,
32
- "eval_loss": 2.3033015727996826,
33
- "eval_precision": 0.11052631578947367,
34
- "eval_recall": 0.15,
35
- "eval_runtime": 0.2186,
36
- "eval_samples_per_second": 91.494,
37
- "eval_steps_per_second": 13.724,
38
- "eval_system_ram_total": 83.48074722290039,
39
- "eval_system_ram_used": 5.000797271728516,
40
- "step": 25
41
- },
42
- {
43
- "epoch": 5.0,
44
- "learning_rate": 1.8e-05,
45
- "loss": 2.2703,
46
- "step": 50
47
- },
48
- {
49
- "epoch": 5.0,
50
- "eval_accuracy": 0.2,
51
- "eval_disk_space_total": 78.1898422241211,
52
- "eval_disk_space_used": 36.279693603515625,
53
- "eval_f1": 0.06857142857142857,
54
- "eval_gpu_ram_allocated": 4.843057632446289,
55
- "eval_gpu_ram_cached": 7.046875,
56
- "eval_gpu_ram_total": 39.56402587890625,
57
- "eval_gpu_utilization": 38,
58
- "eval_loss": 2.3011314868927,
59
- "eval_precision": 0.041666666666666664,
60
- "eval_recall": 0.2,
61
- "eval_runtime": 0.2293,
62
- "eval_samples_per_second": 87.241,
63
- "eval_steps_per_second": 13.086,
64
- "eval_system_ram_total": 83.48074722290039,
65
- "eval_system_ram_used": 5.012126922607422,
66
- "step": 50
67
- },
68
- {
69
- "epoch": 7.5,
70
- "learning_rate": 1.704e-05,
71
- "loss": 2.0062,
72
- "step": 75
73
- },
74
- {
75
- "epoch": 7.5,
76
- "eval_accuracy": 0.15,
77
- "eval_disk_space_total": 78.1898422241211,
78
- "eval_disk_space_used": 36.27973175048828,
79
- "eval_f1": 0.07936507936507937,
80
- "eval_gpu_ram_allocated": 4.843067646026611,
81
- "eval_gpu_ram_cached": 7.046875,
82
- "eval_gpu_ram_total": 39.56402587890625,
83
- "eval_gpu_utilization": 42,
84
- "eval_loss": 2.2817370891571045,
85
- "eval_precision": 0.05428571428571429,
86
- "eval_recall": 0.15,
87
- "eval_runtime": 0.208,
88
- "eval_samples_per_second": 96.173,
89
- "eval_steps_per_second": 14.426,
90
- "eval_system_ram_total": 83.48074722290039,
91
- "eval_system_ram_used": 4.985599517822266,
92
- "step": 75
93
- },
94
- {
95
- "epoch": 10.0,
96
- "learning_rate": 1.6040000000000002e-05,
97
- "loss": 1.49,
98
- "step": 100
99
- },
100
- {
101
- "epoch": 10.0,
102
- "eval_accuracy": 0.2,
103
- "eval_disk_space_total": 78.1898422241211,
104
- "eval_disk_space_used": 36.27973556518555,
105
- "eval_f1": 0.11777777777777779,
106
- "eval_gpu_ram_allocated": 4.843059062957764,
107
- "eval_gpu_ram_cached": 7.046875,
108
- "eval_gpu_ram_total": 39.56402587890625,
109
- "eval_gpu_utilization": 37,
110
- "eval_loss": 2.3280632495880127,
111
- "eval_precision": 0.0869047619047619,
112
- "eval_recall": 0.2,
113
- "eval_runtime": 0.208,
114
- "eval_samples_per_second": 96.137,
115
- "eval_steps_per_second": 14.421,
116
- "eval_system_ram_total": 83.48074722290039,
117
- "eval_system_ram_used": 4.982398986816406,
118
- "step": 100
119
- },
120
- {
121
- "epoch": 12.5,
122
- "learning_rate": 1.5040000000000002e-05,
123
- "loss": 0.9424,
124
- "step": 125
125
- },
126
- {
127
- "epoch": 12.5,
128
- "eval_accuracy": 0.25,
129
- "eval_disk_space_total": 78.1898422241211,
130
- "eval_disk_space_used": 36.27980041503906,
131
- "eval_f1": 0.17333333333333334,
132
- "eval_gpu_ram_allocated": 4.843059062957764,
133
- "eval_gpu_ram_cached": 7.046875,
134
- "eval_gpu_ram_total": 39.56402587890625,
135
- "eval_gpu_utilization": 42,
136
- "eval_loss": 2.3475446701049805,
137
- "eval_precision": 0.14166666666666666,
138
- "eval_recall": 0.25,
139
- "eval_runtime": 0.2046,
140
- "eval_samples_per_second": 97.736,
141
- "eval_steps_per_second": 14.66,
142
- "eval_system_ram_total": 83.48074722290039,
143
- "eval_system_ram_used": 4.944629669189453,
144
- "step": 125
145
- },
146
- {
147
- "epoch": 15.0,
148
- "learning_rate": 1.4040000000000001e-05,
149
- "loss": 0.5591,
150
- "step": 150
151
- },
152
- {
153
- "epoch": 15.0,
154
- "eval_accuracy": 0.25,
155
- "eval_disk_space_total": 78.1898422241211,
156
- "eval_disk_space_used": 36.27981948852539,
157
- "eval_f1": 0.17444444444444446,
158
- "eval_gpu_ram_allocated": 4.843059062957764,
159
- "eval_gpu_ram_cached": 7.046875,
160
- "eval_gpu_ram_total": 39.56402587890625,
161
- "eval_gpu_utilization": 29,
162
- "eval_loss": 2.4503185749053955,
163
- "eval_precision": 0.14523809523809522,
164
- "eval_recall": 0.25,
165
- "eval_runtime": 0.2141,
166
- "eval_samples_per_second": 93.429,
167
- "eval_steps_per_second": 14.014,
168
- "eval_system_ram_total": 83.48074722290039,
169
- "eval_system_ram_used": 4.920143127441406,
170
- "step": 150
171
- },
172
- {
173
- "epoch": 17.5,
174
- "learning_rate": 1.3080000000000002e-05,
175
- "loss": 0.2893,
176
- "step": 175
177
- },
178
- {
179
- "epoch": 17.5,
180
- "eval_accuracy": 0.25,
181
- "eval_disk_space_total": 78.1898422241211,
182
- "eval_disk_space_used": 36.27982711791992,
183
- "eval_f1": 0.17444444444444446,
184
- "eval_gpu_ram_allocated": 4.843067646026611,
185
- "eval_gpu_ram_cached": 7.046875,
186
- "eval_gpu_ram_total": 39.56402587890625,
187
- "eval_gpu_utilization": 43,
188
- "eval_loss": 2.555687665939331,
189
- "eval_precision": 0.14523809523809522,
190
- "eval_recall": 0.25,
191
- "eval_runtime": 0.2265,
192
- "eval_samples_per_second": 88.288,
193
- "eval_steps_per_second": 13.243,
194
- "eval_system_ram_total": 83.48074722290039,
195
- "eval_system_ram_used": 4.961811065673828,
196
- "step": 175
197
- },
198
- {
199
- "epoch": 20.0,
200
- "learning_rate": 1.2080000000000001e-05,
201
- "loss": 0.1623,
202
- "step": 200
203
- },
204
- {
205
- "epoch": 20.0,
206
- "eval_accuracy": 0.3,
207
- "eval_disk_space_total": 78.1898422241211,
208
- "eval_disk_space_used": 36.279876708984375,
209
- "eval_f1": 0.2411111111111111,
210
- "eval_gpu_ram_allocated": 4.843057632446289,
211
- "eval_gpu_ram_cached": 7.046875,
212
- "eval_gpu_ram_total": 39.56402587890625,
213
- "eval_gpu_utilization": 30,
214
- "eval_loss": 2.621793270111084,
215
- "eval_precision": 0.2452380952380952,
216
- "eval_recall": 0.3,
217
- "eval_runtime": 0.217,
218
- "eval_samples_per_second": 92.146,
219
- "eval_steps_per_second": 13.822,
220
- "eval_system_ram_total": 83.48074722290039,
221
- "eval_system_ram_used": 4.910972595214844,
222
- "step": 200
223
- },
224
- {
225
- "epoch": 22.5,
226
- "learning_rate": 1.1080000000000002e-05,
227
- "loss": 0.0817,
228
- "step": 225
229
- },
230
- {
231
- "epoch": 22.5,
232
- "eval_accuracy": 0.3,
233
- "eval_disk_space_total": 78.1898422241211,
234
- "eval_disk_space_used": 36.279930114746094,
235
- "eval_f1": 0.24,
236
- "eval_gpu_ram_allocated": 4.843059062957764,
237
- "eval_gpu_ram_cached": 7.046875,
238
- "eval_gpu_ram_total": 39.56402587890625,
239
- "eval_gpu_utilization": 38,
240
- "eval_loss": 2.734551191329956,
241
- "eval_precision": 0.24166666666666664,
242
- "eval_recall": 0.3,
243
- "eval_runtime": 0.2092,
244
- "eval_samples_per_second": 95.586,
245
- "eval_steps_per_second": 14.338,
246
- "eval_system_ram_total": 83.48074722290039,
247
- "eval_system_ram_used": 4.941272735595703,
248
- "step": 225
249
- },
250
- {
251
- "epoch": 25.0,
252
- "learning_rate": 1.008e-05,
253
- "loss": 0.0475,
254
- "step": 250
255
- },
256
- {
257
- "epoch": 25.0,
258
- "eval_accuracy": 0.3,
259
- "eval_disk_space_total": 78.1898422241211,
260
- "eval_disk_space_used": 36.27995300292969,
261
- "eval_f1": 0.23444444444444446,
262
- "eval_gpu_ram_allocated": 4.843059062957764,
263
- "eval_gpu_ram_cached": 7.046875,
264
- "eval_gpu_ram_total": 39.56402587890625,
265
- "eval_gpu_utilization": 40,
266
- "eval_loss": 2.9325406551361084,
267
- "eval_precision": 0.23690476190476187,
268
- "eval_recall": 0.3,
269
- "eval_runtime": 0.2145,
270
- "eval_samples_per_second": 93.259,
271
- "eval_steps_per_second": 13.989,
272
- "eval_system_ram_total": 83.48074722290039,
273
- "eval_system_ram_used": 4.931392669677734,
274
- "step": 250
275
- },
276
- {
277
- "epoch": 27.5,
278
- "learning_rate": 9.080000000000001e-06,
279
- "loss": 0.0322,
280
- "step": 275
281
- },
282
- {
283
- "epoch": 27.5,
284
- "eval_accuracy": 0.3,
285
- "eval_disk_space_total": 78.1898422241211,
286
- "eval_disk_space_used": 36.28001403808594,
287
- "eval_f1": 0.2511111111111112,
288
- "eval_gpu_ram_allocated": 4.843059062957764,
289
- "eval_gpu_ram_cached": 7.046875,
290
- "eval_gpu_ram_total": 39.56402587890625,
291
- "eval_gpu_utilization": 42,
292
- "eval_loss": 3.123502254486084,
293
- "eval_precision": 0.2869047619047619,
294
- "eval_recall": 0.3,
295
- "eval_runtime": 0.2104,
296
- "eval_samples_per_second": 95.043,
297
- "eval_steps_per_second": 14.256,
298
- "eval_system_ram_total": 83.48074722290039,
299
- "eval_system_ram_used": 4.933628082275391,
300
- "step": 275
301
- },
302
- {
303
- "epoch": 30.0,
304
- "learning_rate": 8.08e-06,
305
- "loss": 0.0254,
306
- "step": 300
307
- },
308
- {
309
- "epoch": 30.0,
310
- "eval_accuracy": 0.3,
311
- "eval_disk_space_total": 78.1898422241211,
312
- "eval_disk_space_used": 36.280025482177734,
313
- "eval_f1": 0.23444444444444446,
314
- "eval_gpu_ram_allocated": 4.843059062957764,
315
- "eval_gpu_ram_cached": 7.046875,
316
- "eval_gpu_ram_total": 39.56402587890625,
317
- "eval_gpu_utilization": 30,
318
- "eval_loss": 3.145473003387451,
319
- "eval_precision": 0.23690476190476187,
320
- "eval_recall": 0.3,
321
- "eval_runtime": 0.2137,
322
- "eval_samples_per_second": 93.578,
323
- "eval_steps_per_second": 14.037,
324
- "eval_system_ram_total": 83.48074722290039,
325
- "eval_system_ram_used": 4.938743591308594,
326
- "step": 300
327
- }
328
- ],
329
- "max_steps": 500,
330
- "num_train_epochs": 50,
331
- "total_flos": 107663196007104.0,
332
- "trial_name": null,
333
- "trial_params": null
334
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-330/added_tokens.json DELETED
@@ -1,3 +0,0 @@
1
- {
2
- "[MASK]": 128000
3
- }
 
 
 
 
checkpoint-330/config.json DELETED
@@ -1,59 +0,0 @@
1
- {
2
- "_name_or_path": "microsoft/deberta-v3-base",
3
- "architectures": [
4
- "DebertaV2ForSequenceClassification"
5
- ],
6
- "attention_probs_dropout_prob": 0.1,
7
- "hidden_act": "gelu",
8
- "hidden_dropout_prob": 0.1,
9
- "hidden_size": 768,
10
- "id2label": {
11
- "0": "Society & Culture",
12
- "1": "Science & Mathematics",
13
- "2": "Health",
14
- "3": "Education & Reference",
15
- "4": "Computers & Internet",
16
- "5": "Sports",
17
- "6": "Business & Finance",
18
- "7": "Entertainment & Music",
19
- "8": "Family & Relationships",
20
- "9": "Politics & Government"
21
- },
22
- "initializer_range": 0.02,
23
- "intermediate_size": 3072,
24
- "label2id": {
25
- "Business & Finance": 6,
26
- "Computers & Internet": 4,
27
- "Education & Reference": 3,
28
- "Entertainment & Music": 7,
29
- "Family & Relationships": 8,
30
- "Health": 2,
31
- "Politics & Government": 9,
32
- "Science & Mathematics": 1,
33
- "Society & Culture": 0,
34
- "Sports": 5
35
- },
36
- "layer_norm_eps": 1e-07,
37
- "max_position_embeddings": 512,
38
- "max_relative_positions": -1,
39
- "model_type": "deberta-v2",
40
- "norm_rel_ebd": "layer_norm",
41
- "num_attention_heads": 12,
42
- "num_hidden_layers": 12,
43
- "pad_token_id": 0,
44
- "pooler_dropout": 0,
45
- "pooler_hidden_act": "gelu",
46
- "pooler_hidden_size": 768,
47
- "pos_att_type": [
48
- "p2c",
49
- "c2p"
50
- ],
51
- "position_biased_input": false,
52
- "position_buckets": 256,
53
- "relative_attention": true,
54
- "share_att_key": true,
55
- "torch_dtype": "float32",
56
- "transformers_version": "4.31.0",
57
- "type_vocab_size": 0,
58
- "vocab_size": 128100
59
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-330/optimizer.pt DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed024c714c4752b95f5020e1ffd2f59940ea99f2bac5826eb9ebfc11a6402ee0
3
- size 1475557125
 
 
 
 
checkpoint-330/special_tokens_map.json DELETED
@@ -1,9 +0,0 @@
1
- {
2
- "bos_token": "[CLS]",
3
- "cls_token": "[CLS]",
4
- "eos_token": "[SEP]",
5
- "mask_token": "[MASK]",
6
- "pad_token": "[PAD]",
7
- "sep_token": "[SEP]",
8
- "unk_token": "[UNK]"
9
- }
 
 
 
 
 
 
 
 
 
 
checkpoint-330/spm.model DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:c679fbf93643d19aab7ee10c0b99e460bdbc02fedf34b92b05af343b4af586fd
3
- size 2464616
 
 
 
 
checkpoint-330/tokenizer.json DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint-330/tokenizer_config.json DELETED
@@ -1,16 +0,0 @@
1
- {
2
- "bos_token": "[CLS]",
3
- "clean_up_tokenization_spaces": true,
4
- "cls_token": "[CLS]",
5
- "do_lower_case": false,
6
- "eos_token": "[SEP]",
7
- "mask_token": "[MASK]",
8
- "model_max_length": 1000000000000000019884624838656,
9
- "pad_token": "[PAD]",
10
- "sep_token": "[SEP]",
11
- "sp_model_kwargs": {},
12
- "split_by_punct": false,
13
- "tokenizer_class": "DebertaV2Tokenizer",
14
- "unk_token": "[UNK]",
15
- "vocab_type": "spm"
16
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
{checkpoint-320 β†’ checkpoint-500}/added_tokens.json RENAMED
File without changes
{checkpoint-320 β†’ checkpoint-500}/config.json RENAMED
File without changes
{checkpoint-320 β†’ checkpoint-500}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:10d02ebf27d06e75305b818f564bc0d6388b260a305ac331618c2ddf0c12353a
3
  size 1475557125
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:662b6c43d90b5b129e2b7b7713353e9f9889d3127c26a7f36cb0f988814a9a23
3
  size 1475557125
{checkpoint-330 β†’ checkpoint-500}/pytorch_model.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd18743bdfde8be92fbd991f5cad7d7c0cd7aaca0583e919148be21d96ce8668
3
  size 737788917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70ba09d349c8c8ac319ad8ae7821294c459961bd17df3092a1a44adff6bd6a8c
3
  size 737788917
{checkpoint-330 β†’ checkpoint-500}/rng_state.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:172ad3783b50b89f49d659f4f234a7bb8ccb479b05073e534df5e40060af3c37
3
  size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c2d91aed1234e662896d21f942670a632f57a0e62c4b4b06ccfd49ae3369c96c
3
  size 14575
{checkpoint-330 β†’ checkpoint-500}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dfeff0174eed12f41b793614fed3bdc8efce764c415e04518a3297ac70a962f8
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4eb82f131c89d4efc13f66a2d38e7dfbfa48833549604228e3e4ad7e42ffe0c
3
  size 627
{checkpoint-320 β†’ checkpoint-500}/special_tokens_map.json RENAMED
File without changes
{checkpoint-320 β†’ checkpoint-500}/spm.model RENAMED
File without changes
{checkpoint-320 β†’ checkpoint-500}/tokenizer.json RENAMED
File without changes
{checkpoint-320 β†’ checkpoint-500}/tokenizer_config.json RENAMED
File without changes
{checkpoint-330 β†’ checkpoint-500}/trainer_state.json RENAMED
@@ -1,8 +1,8 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 33.0,
5
- "global_step": 330,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -350,11 +350,193 @@
350
  "eval_system_ram_total": 83.48074722290039,
351
  "eval_system_ram_used": 4.919792175292969,
352
  "step": 325
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
353
  }
354
  ],
355
  "max_steps": 500,
356
  "num_train_epochs": 50,
357
- "total_flos": 111026386045056.0,
358
  "trial_name": null,
359
  "trial_params": null
360
  }
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 50.0,
5
+ "global_step": 500,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
350
  "eval_system_ram_total": 83.48074722290039,
351
  "eval_system_ram_used": 4.919792175292969,
352
  "step": 325
353
+ },
354
+ {
355
+ "epoch": 35.0,
356
+ "learning_rate": 6.08e-06,
357
+ "loss": 0.0163,
358
+ "step": 350
359
+ },
360
+ {
361
+ "epoch": 35.0,
362
+ "eval_accuracy": 0.3,
363
+ "eval_disk_space_total": 78.1898422241211,
364
+ "eval_disk_space_used": 40.403133392333984,
365
+ "eval_f1": 0.23444444444444446,
366
+ "eval_gpu_ram_allocated": 4.8430633544921875,
367
+ "eval_gpu_ram_cached": 7.046875,
368
+ "eval_gpu_ram_total": 39.56402587890625,
369
+ "eval_gpu_utilization": 42,
370
+ "eval_loss": 3.3280787467956543,
371
+ "eval_precision": 0.2369047619047619,
372
+ "eval_recall": 0.3,
373
+ "eval_runtime": 0.2124,
374
+ "eval_samples_per_second": 94.164,
375
+ "eval_steps_per_second": 14.125,
376
+ "eval_system_ram_total": 83.48074722290039,
377
+ "eval_system_ram_used": 4.970893859863281,
378
+ "step": 350
379
+ },
380
+ {
381
+ "epoch": 37.5,
382
+ "learning_rate": 5.0800000000000005e-06,
383
+ "loss": 0.015,
384
+ "step": 375
385
+ },
386
+ {
387
+ "epoch": 37.5,
388
+ "eval_accuracy": 0.3,
389
+ "eval_disk_space_total": 78.1898422241211,
390
+ "eval_disk_space_used": 40.40315246582031,
391
+ "eval_f1": 0.23444444444444446,
392
+ "eval_gpu_ram_allocated": 4.843059062957764,
393
+ "eval_gpu_ram_cached": 7.046875,
394
+ "eval_gpu_ram_total": 39.56402587890625,
395
+ "eval_gpu_utilization": 41,
396
+ "eval_loss": 3.331848621368408,
397
+ "eval_precision": 0.23690476190476187,
398
+ "eval_recall": 0.3,
399
+ "eval_runtime": 0.2059,
400
+ "eval_samples_per_second": 97.115,
401
+ "eval_steps_per_second": 14.567,
402
+ "eval_system_ram_total": 83.48074722290039,
403
+ "eval_system_ram_used": 4.9642181396484375,
404
+ "step": 375
405
+ },
406
+ {
407
+ "epoch": 40.0,
408
+ "learning_rate": 4.08e-06,
409
+ "loss": 0.0133,
410
+ "step": 400
411
+ },
412
+ {
413
+ "epoch": 40.0,
414
+ "eval_accuracy": 0.3,
415
+ "eval_disk_space_total": 78.1898422241211,
416
+ "eval_disk_space_used": 40.40321350097656,
417
+ "eval_f1": 0.2511111111111112,
418
+ "eval_gpu_ram_allocated": 4.843057632446289,
419
+ "eval_gpu_ram_cached": 7.046875,
420
+ "eval_gpu_ram_total": 39.56402587890625,
421
+ "eval_gpu_utilization": 37,
422
+ "eval_loss": 3.361743927001953,
423
+ "eval_precision": 0.2869047619047619,
424
+ "eval_recall": 0.3,
425
+ "eval_runtime": 0.2122,
426
+ "eval_samples_per_second": 94.248,
427
+ "eval_steps_per_second": 14.137,
428
+ "eval_system_ram_total": 83.48074722290039,
429
+ "eval_system_ram_used": 4.96075439453125,
430
+ "step": 400
431
+ },
432
+ {
433
+ "epoch": 42.5,
434
+ "learning_rate": 3.08e-06,
435
+ "loss": 0.0127,
436
+ "step": 425
437
+ },
438
+ {
439
+ "epoch": 42.5,
440
+ "eval_accuracy": 0.3,
441
+ "eval_disk_space_total": 78.1898422241211,
442
+ "eval_disk_space_used": 40.403221130371094,
443
+ "eval_f1": 0.23444444444444446,
444
+ "eval_gpu_ram_allocated": 4.843057632446289,
445
+ "eval_gpu_ram_cached": 7.046875,
446
+ "eval_gpu_ram_total": 39.56402587890625,
447
+ "eval_gpu_utilization": 39,
448
+ "eval_loss": 3.3787875175476074,
449
+ "eval_precision": 0.23690476190476187,
450
+ "eval_recall": 0.3,
451
+ "eval_runtime": 0.2221,
452
+ "eval_samples_per_second": 90.039,
453
+ "eval_steps_per_second": 13.506,
454
+ "eval_system_ram_total": 83.48074722290039,
455
+ "eval_system_ram_used": 4.961738586425781,
456
+ "step": 425
457
+ },
458
+ {
459
+ "epoch": 45.0,
460
+ "learning_rate": 2.08e-06,
461
+ "loss": 0.0129,
462
+ "step": 450
463
+ },
464
+ {
465
+ "epoch": 45.0,
466
+ "eval_accuracy": 0.3,
467
+ "eval_disk_space_total": 78.1898422241211,
468
+ "eval_disk_space_used": 40.40324783325195,
469
+ "eval_f1": 0.2511111111111112,
470
+ "eval_gpu_ram_allocated": 4.843059062957764,
471
+ "eval_gpu_ram_cached": 7.046875,
472
+ "eval_gpu_ram_total": 39.56402587890625,
473
+ "eval_gpu_utilization": 41,
474
+ "eval_loss": 3.392756700515747,
475
+ "eval_precision": 0.2869047619047619,
476
+ "eval_recall": 0.3,
477
+ "eval_runtime": 0.2132,
478
+ "eval_samples_per_second": 93.819,
479
+ "eval_steps_per_second": 14.073,
480
+ "eval_system_ram_total": 83.48074722290039,
481
+ "eval_system_ram_used": 4.957637786865234,
482
+ "step": 450
483
+ },
484
+ {
485
+ "epoch": 47.5,
486
+ "learning_rate": 1.08e-06,
487
+ "loss": 0.0121,
488
+ "step": 475
489
+ },
490
+ {
491
+ "epoch": 47.5,
492
+ "eval_accuracy": 0.3,
493
+ "eval_disk_space_total": 78.1898422241211,
494
+ "eval_disk_space_used": 40.403297424316406,
495
+ "eval_f1": 0.23444444444444446,
496
+ "eval_gpu_ram_allocated": 4.8430633544921875,
497
+ "eval_gpu_ram_cached": 7.046875,
498
+ "eval_gpu_ram_total": 39.56402587890625,
499
+ "eval_gpu_utilization": 37,
500
+ "eval_loss": 3.389730453491211,
501
+ "eval_precision": 0.2369047619047619,
502
+ "eval_recall": 0.3,
503
+ "eval_runtime": 0.2122,
504
+ "eval_samples_per_second": 94.239,
505
+ "eval_steps_per_second": 14.136,
506
+ "eval_system_ram_total": 83.48074722290039,
507
+ "eval_system_ram_used": 4.942131042480469,
508
+ "step": 475
509
+ },
510
+ {
511
+ "epoch": 50.0,
512
+ "learning_rate": 8e-08,
513
+ "loss": 0.0124,
514
+ "step": 500
515
+ },
516
+ {
517
+ "epoch": 50.0,
518
+ "eval_accuracy": 0.3,
519
+ "eval_disk_space_total": 78.1898422241211,
520
+ "eval_disk_space_used": 40.4033203125,
521
+ "eval_f1": 0.23444444444444446,
522
+ "eval_gpu_ram_allocated": 4.843059062957764,
523
+ "eval_gpu_ram_cached": 7.046875,
524
+ "eval_gpu_ram_total": 39.56402587890625,
525
+ "eval_gpu_utilization": 38,
526
+ "eval_loss": 3.3983218669891357,
527
+ "eval_precision": 0.2369047619047619,
528
+ "eval_recall": 0.3,
529
+ "eval_runtime": 0.2214,
530
+ "eval_samples_per_second": 90.351,
531
+ "eval_steps_per_second": 13.553,
532
+ "eval_system_ram_total": 83.48074722290039,
533
+ "eval_system_ram_used": 4.958106994628906,
534
+ "step": 500
535
  }
536
  ],
537
  "max_steps": 500,
538
  "num_train_epochs": 50,
539
+ "total_flos": 168180059293920.0,
540
  "trial_name": null,
541
  "trial_params": null
542
  }
{checkpoint-320 β†’ checkpoint-500}/training_args.bin RENAMED
File without changes
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cd18743bdfde8be92fbd991f5cad7d7c0cd7aaca0583e919148be21d96ce8668
3
  size 737788917
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:70ba09d349c8c8ac319ad8ae7821294c459961bd17df3092a1a44adff6bd6a8c
3
  size 737788917
runs/Jul22_22-00-48_9cc823f43576/events.out.tfevents.1690063253.9cc823f43576.282.1 CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e999567e134d87b11c54fa290ab2e27a310e33b30d4b21ec4cb70c2181feb3b4
3
- size 19330
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31890ceef7ba0d7f63da813a8499776805624be35cb8f5759a0686848a9108d1
3
+ size 27398
checkpoint-330/training_args.bin β†’ runs/Jul22_22-00-48_9cc823f43576/events.out.tfevents.1690063714.9cc823f43576.282.2 RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:47900edd0b53679608a0ede66451399bbfa5cd566fbc3709079b0e0cd194e43b
3
- size 4091
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e4404bb0eef3ad97244fe6018ae250a8e464657a68b3198d21200bcaaa5e6db4
3
+ size 1033