sheepy928 commited on
Commit
5a161d4
1 Parent(s): ddfdf23

Training in progress, step 2400, checkpoint

Browse files
checkpoint-2400/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sheepy928/default",
3
+ "architectures": [
4
+ "RobertaForSentimentClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "finetuning_task": "text-classification",
11
+ "hidden_act": "gelu",
12
+ "hidden_dropout_prob": 0.1,
13
+ "hidden_size": 768,
14
+ "id2label": {
15
+ "0": "0",
16
+ "1": "1",
17
+ "2": "2"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "label2id": {
22
+ "0": 0,
23
+ "1": 1,
24
+ "2": 2
25
+ },
26
+ "layer_norm_eps": 1e-05,
27
+ "max_position_embeddings": 514,
28
+ "model_type": "roberta",
29
+ "num_attention_heads": 12,
30
+ "num_hidden_layers": 12,
31
+ "pad_token_id": 1,
32
+ "position_embedding_type": "absolute",
33
+ "problem_type": "single_label_classification",
34
+ "torch_dtype": "float32",
35
+ "transformers_version": "4.34.1",
36
+ "type_vocab_size": 1,
37
+ "use_cache": true,
38
+ "vocab_size": 50265
39
+ }
checkpoint-2400/merges.txt ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2400/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:398764b18b0a38843ed17563acb48c1777e417560821ef3ee0fc1196947bc232
3
+ size 997351674
checkpoint-2400/pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7db27505b41fce84ebfd39ded14ad559d80b417b28a7dbfe568ad8f6e816b1c6
3
+ size 498661166
checkpoint-2400/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24f1276398b5a038bd8d249541fc7c734b10913a7b6fd9515368d11c7d0ffda2
3
+ size 14244
checkpoint-2400/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2d99757b4d430ba06bba791e8a73547b0accd058a05117e17256351903bc2081
3
+ size 1064
checkpoint-2400/special_tokens_map.json ADDED
@@ -0,0 +1,51 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": true,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "cls_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
+ "pad_token": {
31
+ "content": "<pad>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": false
36
+ },
37
+ "sep_token": {
38
+ "content": "</s>",
39
+ "lstrip": false,
40
+ "normalized": true,
41
+ "rstrip": false,
42
+ "single_word": false
43
+ },
44
+ "unk_token": {
45
+ "content": "<unk>",
46
+ "lstrip": false,
47
+ "normalized": true,
48
+ "rstrip": false,
49
+ "single_word": false
50
+ }
51
+ }
checkpoint-2400/tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
checkpoint-2400/tokenizer_config.json ADDED
@@ -0,0 +1,57 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_prefix_space": false,
3
+ "added_tokens_decoder": {
4
+ "0": {
5
+ "content": "<s>",
6
+ "lstrip": false,
7
+ "normalized": true,
8
+ "rstrip": false,
9
+ "single_word": false,
10
+ "special": true
11
+ },
12
+ "1": {
13
+ "content": "<pad>",
14
+ "lstrip": false,
15
+ "normalized": true,
16
+ "rstrip": false,
17
+ "single_word": false,
18
+ "special": true
19
+ },
20
+ "2": {
21
+ "content": "</s>",
22
+ "lstrip": false,
23
+ "normalized": true,
24
+ "rstrip": false,
25
+ "single_word": false,
26
+ "special": true
27
+ },
28
+ "3": {
29
+ "content": "<unk>",
30
+ "lstrip": false,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false,
34
+ "special": true
35
+ },
36
+ "50264": {
37
+ "content": "<mask>",
38
+ "lstrip": true,
39
+ "normalized": false,
40
+ "rstrip": false,
41
+ "single_word": false,
42
+ "special": true
43
+ }
44
+ },
45
+ "bos_token": "<s>",
46
+ "clean_up_tokenization_spaces": true,
47
+ "cls_token": "<s>",
48
+ "eos_token": "</s>",
49
+ "errors": "replace",
50
+ "mask_token": "<mask>",
51
+ "model_max_length": 512,
52
+ "pad_token": "<pad>",
53
+ "sep_token": "</s>",
54
+ "tokenizer_class": "RobertaTokenizer",
55
+ "trim_offsets": true,
56
+ "unk_token": "<unk>"
57
+ }
checkpoint-2400/trainer_state.json ADDED
@@ -0,0 +1,1563 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 12.76595744680851,
5
+ "eval_steps": 300,
6
+ "global_step": 2400,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.05,
13
+ "learning_rate": 5e-07,
14
+ "loss": 1.0874,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.11,
19
+ "learning_rate": 1e-06,
20
+ "loss": 0.9487,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.16,
25
+ "learning_rate": 1.5e-06,
26
+ "loss": 0.7586,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.21,
31
+ "learning_rate": 2e-06,
32
+ "loss": 0.7225,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.27,
37
+ "learning_rate": 2.5e-06,
38
+ "loss": 0.7364,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.32,
43
+ "learning_rate": 3e-06,
44
+ "loss": 0.7265,
45
+ "step": 60
46
+ },
47
+ {
48
+ "epoch": 0.37,
49
+ "learning_rate": 3.5e-06,
50
+ "loss": 0.7267,
51
+ "step": 70
52
+ },
53
+ {
54
+ "epoch": 0.43,
55
+ "learning_rate": 4e-06,
56
+ "loss": 0.5697,
57
+ "step": 80
58
+ },
59
+ {
60
+ "epoch": 0.48,
61
+ "learning_rate": 4.5e-06,
62
+ "loss": 1.018,
63
+ "step": 90
64
+ },
65
+ {
66
+ "epoch": 0.53,
67
+ "learning_rate": 5e-06,
68
+ "loss": 0.7875,
69
+ "step": 100
70
+ },
71
+ {
72
+ "epoch": 0.59,
73
+ "learning_rate": 5.5e-06,
74
+ "loss": 0.8242,
75
+ "step": 110
76
+ },
77
+ {
78
+ "epoch": 0.64,
79
+ "learning_rate": 6e-06,
80
+ "loss": 0.8034,
81
+ "step": 120
82
+ },
83
+ {
84
+ "epoch": 0.69,
85
+ "learning_rate": 6.5e-06,
86
+ "loss": 0.7717,
87
+ "step": 130
88
+ },
89
+ {
90
+ "epoch": 0.74,
91
+ "learning_rate": 7e-06,
92
+ "loss": 0.8337,
93
+ "step": 140
94
+ },
95
+ {
96
+ "epoch": 0.8,
97
+ "learning_rate": 7.5e-06,
98
+ "loss": 0.6884,
99
+ "step": 150
100
+ },
101
+ {
102
+ "epoch": 0.85,
103
+ "learning_rate": 8e-06,
104
+ "loss": 0.9129,
105
+ "step": 160
106
+ },
107
+ {
108
+ "epoch": 0.9,
109
+ "learning_rate": 8.500000000000002e-06,
110
+ "loss": 0.5637,
111
+ "step": 170
112
+ },
113
+ {
114
+ "epoch": 0.96,
115
+ "learning_rate": 9e-06,
116
+ "loss": 1.0458,
117
+ "step": 180
118
+ },
119
+ {
120
+ "epoch": 1.01,
121
+ "learning_rate": 9.5e-06,
122
+ "loss": 0.9414,
123
+ "step": 190
124
+ },
125
+ {
126
+ "epoch": 1.06,
127
+ "learning_rate": 1e-05,
128
+ "loss": 0.6379,
129
+ "step": 200
130
+ },
131
+ {
132
+ "epoch": 1.12,
133
+ "learning_rate": 1.0500000000000001e-05,
134
+ "loss": 0.9249,
135
+ "step": 210
136
+ },
137
+ {
138
+ "epoch": 1.17,
139
+ "learning_rate": 1.1e-05,
140
+ "loss": 0.6944,
141
+ "step": 220
142
+ },
143
+ {
144
+ "epoch": 1.22,
145
+ "learning_rate": 1.15e-05,
146
+ "loss": 0.9221,
147
+ "step": 230
148
+ },
149
+ {
150
+ "epoch": 1.28,
151
+ "learning_rate": 1.2e-05,
152
+ "loss": 0.6475,
153
+ "step": 240
154
+ },
155
+ {
156
+ "epoch": 1.33,
157
+ "learning_rate": 1.25e-05,
158
+ "loss": 0.7748,
159
+ "step": 250
160
+ },
161
+ {
162
+ "epoch": 1.38,
163
+ "learning_rate": 1.3e-05,
164
+ "loss": 0.8705,
165
+ "step": 260
166
+ },
167
+ {
168
+ "epoch": 1.44,
169
+ "learning_rate": 1.35e-05,
170
+ "loss": 0.7737,
171
+ "step": 270
172
+ },
173
+ {
174
+ "epoch": 1.49,
175
+ "learning_rate": 1.4e-05,
176
+ "loss": 0.8643,
177
+ "step": 280
178
+ },
179
+ {
180
+ "epoch": 1.54,
181
+ "learning_rate": 1.4500000000000002e-05,
182
+ "loss": 0.8428,
183
+ "step": 290
184
+ },
185
+ {
186
+ "epoch": 1.6,
187
+ "learning_rate": 1.5e-05,
188
+ "loss": 0.6785,
189
+ "step": 300
190
+ },
191
+ {
192
+ "epoch": 1.6,
193
+ "eval_accuracy": 0.7386666666666667,
194
+ "eval_combined_score": 0.6626504648943422,
195
+ "eval_f1": 0.6276400817995911,
196
+ "eval_loss": 0.7930460572242737,
197
+ "eval_precision": 0.5456284444444445,
198
+ "eval_recall": 0.7386666666666667,
199
+ "eval_runtime": 6.0663,
200
+ "eval_samples_per_second": 247.266,
201
+ "eval_steps_per_second": 7.748,
202
+ "step": 300
203
+ },
204
+ {
205
+ "epoch": 1.65,
206
+ "learning_rate": 1.55e-05,
207
+ "loss": 0.6076,
208
+ "step": 310
209
+ },
210
+ {
211
+ "epoch": 1.7,
212
+ "learning_rate": 1.6e-05,
213
+ "loss": 0.5963,
214
+ "step": 320
215
+ },
216
+ {
217
+ "epoch": 1.76,
218
+ "learning_rate": 1.65e-05,
219
+ "loss": 0.6626,
220
+ "step": 330
221
+ },
222
+ {
223
+ "epoch": 1.81,
224
+ "learning_rate": 1.7000000000000003e-05,
225
+ "loss": 0.8379,
226
+ "step": 340
227
+ },
228
+ {
229
+ "epoch": 1.86,
230
+ "learning_rate": 1.7500000000000002e-05,
231
+ "loss": 0.8851,
232
+ "step": 350
233
+ },
234
+ {
235
+ "epoch": 1.91,
236
+ "learning_rate": 1.8e-05,
237
+ "loss": 0.7489,
238
+ "step": 360
239
+ },
240
+ {
241
+ "epoch": 1.97,
242
+ "learning_rate": 1.85e-05,
243
+ "loss": 0.7573,
244
+ "step": 370
245
+ },
246
+ {
247
+ "epoch": 2.02,
248
+ "learning_rate": 1.9e-05,
249
+ "loss": 0.8018,
250
+ "step": 380
251
+ },
252
+ {
253
+ "epoch": 2.07,
254
+ "learning_rate": 1.95e-05,
255
+ "loss": 0.6645,
256
+ "step": 390
257
+ },
258
+ {
259
+ "epoch": 2.13,
260
+ "learning_rate": 2e-05,
261
+ "loss": 0.8677,
262
+ "step": 400
263
+ },
264
+ {
265
+ "epoch": 2.18,
266
+ "learning_rate": 2.05e-05,
267
+ "loss": 0.7478,
268
+ "step": 410
269
+ },
270
+ {
271
+ "epoch": 2.23,
272
+ "learning_rate": 2.1000000000000002e-05,
273
+ "loss": 0.8551,
274
+ "step": 420
275
+ },
276
+ {
277
+ "epoch": 2.29,
278
+ "learning_rate": 2.1499999999999997e-05,
279
+ "loss": 0.9323,
280
+ "step": 430
281
+ },
282
+ {
283
+ "epoch": 2.34,
284
+ "learning_rate": 2.2e-05,
285
+ "loss": 0.7536,
286
+ "step": 440
287
+ },
288
+ {
289
+ "epoch": 2.39,
290
+ "learning_rate": 2.2499999999999998e-05,
291
+ "loss": 0.5336,
292
+ "step": 450
293
+ },
294
+ {
295
+ "epoch": 2.45,
296
+ "learning_rate": 2.3e-05,
297
+ "loss": 0.8955,
298
+ "step": 460
299
+ },
300
+ {
301
+ "epoch": 2.5,
302
+ "learning_rate": 2.3500000000000002e-05,
303
+ "loss": 0.7926,
304
+ "step": 470
305
+ },
306
+ {
307
+ "epoch": 2.55,
308
+ "learning_rate": 2.4e-05,
309
+ "loss": 0.5713,
310
+ "step": 480
311
+ },
312
+ {
313
+ "epoch": 2.61,
314
+ "learning_rate": 2.4500000000000003e-05,
315
+ "loss": 0.8568,
316
+ "step": 490
317
+ },
318
+ {
319
+ "epoch": 2.66,
320
+ "learning_rate": 2.5e-05,
321
+ "loss": 0.6348,
322
+ "step": 500
323
+ },
324
+ {
325
+ "epoch": 2.71,
326
+ "learning_rate": 2.55e-05,
327
+ "loss": 0.6223,
328
+ "step": 510
329
+ },
330
+ {
331
+ "epoch": 2.77,
332
+ "learning_rate": 2.6e-05,
333
+ "loss": 0.7579,
334
+ "step": 520
335
+ },
336
+ {
337
+ "epoch": 2.82,
338
+ "learning_rate": 2.65e-05,
339
+ "loss": 0.6325,
340
+ "step": 530
341
+ },
342
+ {
343
+ "epoch": 2.87,
344
+ "learning_rate": 2.7e-05,
345
+ "loss": 0.7276,
346
+ "step": 540
347
+ },
348
+ {
349
+ "epoch": 2.93,
350
+ "learning_rate": 2.75e-05,
351
+ "loss": 0.8766,
352
+ "step": 550
353
+ },
354
+ {
355
+ "epoch": 2.98,
356
+ "learning_rate": 2.8e-05,
357
+ "loss": 0.7107,
358
+ "step": 560
359
+ },
360
+ {
361
+ "epoch": 3.03,
362
+ "learning_rate": 2.85e-05,
363
+ "loss": 0.5904,
364
+ "step": 570
365
+ },
366
+ {
367
+ "epoch": 3.09,
368
+ "learning_rate": 2.9000000000000004e-05,
369
+ "loss": 0.7125,
370
+ "step": 580
371
+ },
372
+ {
373
+ "epoch": 3.14,
374
+ "learning_rate": 2.95e-05,
375
+ "loss": 0.755,
376
+ "step": 590
377
+ },
378
+ {
379
+ "epoch": 3.19,
380
+ "learning_rate": 3e-05,
381
+ "loss": 0.5583,
382
+ "step": 600
383
+ },
384
+ {
385
+ "epoch": 3.19,
386
+ "eval_accuracy": 0.7613333333333333,
387
+ "eval_combined_score": 0.7403762607674487,
388
+ "eval_f1": 0.7316181693637903,
389
+ "eval_loss": 0.6910097599029541,
390
+ "eval_precision": 0.7072202070393374,
391
+ "eval_recall": 0.7613333333333333,
392
+ "eval_runtime": 6.0882,
393
+ "eval_samples_per_second": 246.376,
394
+ "eval_steps_per_second": 7.72,
395
+ "step": 600
396
+ },
397
+ {
398
+ "epoch": 3.24,
399
+ "learning_rate": 3.05e-05,
400
+ "loss": 0.6527,
401
+ "step": 610
402
+ },
403
+ {
404
+ "epoch": 3.3,
405
+ "learning_rate": 3.1e-05,
406
+ "loss": 0.7676,
407
+ "step": 620
408
+ },
409
+ {
410
+ "epoch": 3.35,
411
+ "learning_rate": 3.15e-05,
412
+ "loss": 0.766,
413
+ "step": 630
414
+ },
415
+ {
416
+ "epoch": 3.4,
417
+ "learning_rate": 3.2e-05,
418
+ "loss": 0.6217,
419
+ "step": 640
420
+ },
421
+ {
422
+ "epoch": 3.46,
423
+ "learning_rate": 3.2500000000000004e-05,
424
+ "loss": 0.5405,
425
+ "step": 650
426
+ },
427
+ {
428
+ "epoch": 3.51,
429
+ "learning_rate": 3.3e-05,
430
+ "loss": 0.7951,
431
+ "step": 660
432
+ },
433
+ {
434
+ "epoch": 3.56,
435
+ "learning_rate": 3.35e-05,
436
+ "loss": 0.552,
437
+ "step": 670
438
+ },
439
+ {
440
+ "epoch": 3.62,
441
+ "learning_rate": 3.4000000000000007e-05,
442
+ "loss": 0.6521,
443
+ "step": 680
444
+ },
445
+ {
446
+ "epoch": 3.67,
447
+ "learning_rate": 3.4500000000000005e-05,
448
+ "loss": 0.6814,
449
+ "step": 690
450
+ },
451
+ {
452
+ "epoch": 3.72,
453
+ "learning_rate": 3.5000000000000004e-05,
454
+ "loss": 0.3992,
455
+ "step": 700
456
+ },
457
+ {
458
+ "epoch": 3.78,
459
+ "learning_rate": 3.5499999999999996e-05,
460
+ "loss": 0.7018,
461
+ "step": 710
462
+ },
463
+ {
464
+ "epoch": 3.83,
465
+ "learning_rate": 3.6e-05,
466
+ "loss": 0.664,
467
+ "step": 720
468
+ },
469
+ {
470
+ "epoch": 3.88,
471
+ "learning_rate": 3.65e-05,
472
+ "loss": 0.6891,
473
+ "step": 730
474
+ },
475
+ {
476
+ "epoch": 3.94,
477
+ "learning_rate": 3.7e-05,
478
+ "loss": 0.6063,
479
+ "step": 740
480
+ },
481
+ {
482
+ "epoch": 3.99,
483
+ "learning_rate": 3.75e-05,
484
+ "loss": 0.6543,
485
+ "step": 750
486
+ },
487
+ {
488
+ "epoch": 4.04,
489
+ "learning_rate": 3.8e-05,
490
+ "loss": 0.5638,
491
+ "step": 760
492
+ },
493
+ {
494
+ "epoch": 4.1,
495
+ "learning_rate": 3.85e-05,
496
+ "loss": 0.6984,
497
+ "step": 770
498
+ },
499
+ {
500
+ "epoch": 4.15,
501
+ "learning_rate": 3.9e-05,
502
+ "loss": 0.7148,
503
+ "step": 780
504
+ },
505
+ {
506
+ "epoch": 4.2,
507
+ "learning_rate": 3.95e-05,
508
+ "loss": 0.4915,
509
+ "step": 790
510
+ },
511
+ {
512
+ "epoch": 4.26,
513
+ "learning_rate": 4e-05,
514
+ "loss": 0.648,
515
+ "step": 800
516
+ },
517
+ {
518
+ "epoch": 4.31,
519
+ "learning_rate": 4.05e-05,
520
+ "loss": 0.4527,
521
+ "step": 810
522
+ },
523
+ {
524
+ "epoch": 4.36,
525
+ "learning_rate": 4.1e-05,
526
+ "loss": 0.551,
527
+ "step": 820
528
+ },
529
+ {
530
+ "epoch": 4.41,
531
+ "learning_rate": 4.1500000000000006e-05,
532
+ "loss": 0.5909,
533
+ "step": 830
534
+ },
535
+ {
536
+ "epoch": 4.47,
537
+ "learning_rate": 4.2000000000000004e-05,
538
+ "loss": 0.603,
539
+ "step": 840
540
+ },
541
+ {
542
+ "epoch": 4.52,
543
+ "learning_rate": 4.25e-05,
544
+ "loss": 1.0745,
545
+ "step": 850
546
+ },
547
+ {
548
+ "epoch": 4.57,
549
+ "learning_rate": 4.2999999999999995e-05,
550
+ "loss": 0.6697,
551
+ "step": 860
552
+ },
553
+ {
554
+ "epoch": 4.63,
555
+ "learning_rate": 4.35e-05,
556
+ "loss": 0.8374,
557
+ "step": 870
558
+ },
559
+ {
560
+ "epoch": 4.68,
561
+ "learning_rate": 4.4e-05,
562
+ "loss": 0.4891,
563
+ "step": 880
564
+ },
565
+ {
566
+ "epoch": 4.73,
567
+ "learning_rate": 4.45e-05,
568
+ "loss": 0.5956,
569
+ "step": 890
570
+ },
571
+ {
572
+ "epoch": 4.79,
573
+ "learning_rate": 4.4999999999999996e-05,
574
+ "loss": 0.7857,
575
+ "step": 900
576
+ },
577
+ {
578
+ "epoch": 4.79,
579
+ "eval_accuracy": 0.7386666666666667,
580
+ "eval_combined_score": 0.6626504648943422,
581
+ "eval_f1": 0.6276400817995911,
582
+ "eval_loss": 0.6514685153961182,
583
+ "eval_precision": 0.5456284444444445,
584
+ "eval_recall": 0.7386666666666667,
585
+ "eval_runtime": 5.9473,
586
+ "eval_samples_per_second": 252.216,
587
+ "eval_steps_per_second": 7.903,
588
+ "step": 900
589
+ },
590
+ {
591
+ "epoch": 4.84,
592
+ "learning_rate": 4.55e-05,
593
+ "loss": 0.8566,
594
+ "step": 910
595
+ },
596
+ {
597
+ "epoch": 4.89,
598
+ "learning_rate": 4.6e-05,
599
+ "loss": 0.4698,
600
+ "step": 920
601
+ },
602
+ {
603
+ "epoch": 4.95,
604
+ "learning_rate": 4.65e-05,
605
+ "loss": 0.7224,
606
+ "step": 930
607
+ },
608
+ {
609
+ "epoch": 5.0,
610
+ "learning_rate": 4.7000000000000004e-05,
611
+ "loss": 0.5879,
612
+ "step": 940
613
+ },
614
+ {
615
+ "epoch": 5.05,
616
+ "learning_rate": 4.75e-05,
617
+ "loss": 0.3592,
618
+ "step": 950
619
+ },
620
+ {
621
+ "epoch": 5.11,
622
+ "learning_rate": 4.8e-05,
623
+ "loss": 0.513,
624
+ "step": 960
625
+ },
626
+ {
627
+ "epoch": 5.16,
628
+ "learning_rate": 4.85e-05,
629
+ "loss": 0.6167,
630
+ "step": 970
631
+ },
632
+ {
633
+ "epoch": 5.21,
634
+ "learning_rate": 4.9000000000000005e-05,
635
+ "loss": 0.5706,
636
+ "step": 980
637
+ },
638
+ {
639
+ "epoch": 5.27,
640
+ "learning_rate": 4.9500000000000004e-05,
641
+ "loss": 0.5523,
642
+ "step": 990
643
+ },
644
+ {
645
+ "epoch": 5.32,
646
+ "learning_rate": 5e-05,
647
+ "loss": 0.6565,
648
+ "step": 1000
649
+ },
650
+ {
651
+ "epoch": 5.37,
652
+ "learning_rate": 5.05e-05,
653
+ "loss": 0.4634,
654
+ "step": 1010
655
+ },
656
+ {
657
+ "epoch": 5.43,
658
+ "learning_rate": 5.1e-05,
659
+ "loss": 0.634,
660
+ "step": 1020
661
+ },
662
+ {
663
+ "epoch": 5.48,
664
+ "learning_rate": 5.15e-05,
665
+ "loss": 0.5472,
666
+ "step": 1030
667
+ },
668
+ {
669
+ "epoch": 5.53,
670
+ "learning_rate": 5.2e-05,
671
+ "loss": 0.595,
672
+ "step": 1040
673
+ },
674
+ {
675
+ "epoch": 5.59,
676
+ "learning_rate": 5.25e-05,
677
+ "loss": 0.4889,
678
+ "step": 1050
679
+ },
680
+ {
681
+ "epoch": 5.64,
682
+ "learning_rate": 5.3e-05,
683
+ "loss": 0.5884,
684
+ "step": 1060
685
+ },
686
+ {
687
+ "epoch": 5.69,
688
+ "learning_rate": 5.35e-05,
689
+ "loss": 0.4149,
690
+ "step": 1070
691
+ },
692
+ {
693
+ "epoch": 5.74,
694
+ "learning_rate": 5.4e-05,
695
+ "loss": 0.7573,
696
+ "step": 1080
697
+ },
698
+ {
699
+ "epoch": 5.8,
700
+ "learning_rate": 5.45e-05,
701
+ "loss": 0.599,
702
+ "step": 1090
703
+ },
704
+ {
705
+ "epoch": 5.85,
706
+ "learning_rate": 5.5e-05,
707
+ "loss": 0.366,
708
+ "step": 1100
709
+ },
710
+ {
711
+ "epoch": 5.9,
712
+ "learning_rate": 5.55e-05,
713
+ "loss": 0.4906,
714
+ "step": 1110
715
+ },
716
+ {
717
+ "epoch": 5.96,
718
+ "learning_rate": 5.6e-05,
719
+ "loss": 0.4378,
720
+ "step": 1120
721
+ },
722
+ {
723
+ "epoch": 6.01,
724
+ "learning_rate": 5.6500000000000005e-05,
725
+ "loss": 0.4886,
726
+ "step": 1130
727
+ },
728
+ {
729
+ "epoch": 6.06,
730
+ "learning_rate": 5.7e-05,
731
+ "loss": 0.342,
732
+ "step": 1140
733
+ },
734
+ {
735
+ "epoch": 6.12,
736
+ "learning_rate": 5.75e-05,
737
+ "loss": 0.7803,
738
+ "step": 1150
739
+ },
740
+ {
741
+ "epoch": 6.17,
742
+ "learning_rate": 5.800000000000001e-05,
743
+ "loss": 0.3435,
744
+ "step": 1160
745
+ },
746
+ {
747
+ "epoch": 6.22,
748
+ "learning_rate": 5.8500000000000006e-05,
749
+ "loss": 0.6149,
750
+ "step": 1170
751
+ },
752
+ {
753
+ "epoch": 6.28,
754
+ "learning_rate": 5.9e-05,
755
+ "loss": 0.3026,
756
+ "step": 1180
757
+ },
758
+ {
759
+ "epoch": 6.33,
760
+ "learning_rate": 5.9499999999999996e-05,
761
+ "loss": 0.9387,
762
+ "step": 1190
763
+ },
764
+ {
765
+ "epoch": 6.38,
766
+ "learning_rate": 6e-05,
767
+ "loss": 0.6309,
768
+ "step": 1200
769
+ },
770
+ {
771
+ "epoch": 6.38,
772
+ "eval_accuracy": 0.848,
773
+ "eval_combined_score": 0.8403047916631027,
774
+ "eval_f1": 0.8270449473875959,
775
+ "eval_loss": 0.5592399835586548,
776
+ "eval_precision": 0.8381742192648148,
777
+ "eval_recall": 0.848,
778
+ "eval_runtime": 5.9701,
779
+ "eval_samples_per_second": 251.251,
780
+ "eval_steps_per_second": 7.873,
781
+ "step": 1200
782
+ },
783
+ {
784
+ "epoch": 6.44,
785
+ "learning_rate": 6.05e-05,
786
+ "loss": 0.4672,
787
+ "step": 1210
788
+ },
789
+ {
790
+ "epoch": 6.49,
791
+ "learning_rate": 6.1e-05,
792
+ "loss": 0.4909,
793
+ "step": 1220
794
+ },
795
+ {
796
+ "epoch": 6.54,
797
+ "learning_rate": 6.15e-05,
798
+ "loss": 0.5007,
799
+ "step": 1230
800
+ },
801
+ {
802
+ "epoch": 6.6,
803
+ "learning_rate": 6.2e-05,
804
+ "loss": 0.4318,
805
+ "step": 1240
806
+ },
807
+ {
808
+ "epoch": 6.65,
809
+ "learning_rate": 6.25e-05,
810
+ "loss": 0.3623,
811
+ "step": 1250
812
+ },
813
+ {
814
+ "epoch": 6.7,
815
+ "learning_rate": 6.3e-05,
816
+ "loss": 0.3294,
817
+ "step": 1260
818
+ },
819
+ {
820
+ "epoch": 6.76,
821
+ "learning_rate": 6.35e-05,
822
+ "loss": 0.7501,
823
+ "step": 1270
824
+ },
825
+ {
826
+ "epoch": 6.81,
827
+ "learning_rate": 6.4e-05,
828
+ "loss": 0.6436,
829
+ "step": 1280
830
+ },
831
+ {
832
+ "epoch": 6.86,
833
+ "learning_rate": 6.450000000000001e-05,
834
+ "loss": 0.525,
835
+ "step": 1290
836
+ },
837
+ {
838
+ "epoch": 6.91,
839
+ "learning_rate": 6.500000000000001e-05,
840
+ "loss": 0.6047,
841
+ "step": 1300
842
+ },
843
+ {
844
+ "epoch": 6.97,
845
+ "learning_rate": 6.55e-05,
846
+ "loss": 0.3636,
847
+ "step": 1310
848
+ },
849
+ {
850
+ "epoch": 7.02,
851
+ "learning_rate": 6.6e-05,
852
+ "loss": 0.5634,
853
+ "step": 1320
854
+ },
855
+ {
856
+ "epoch": 7.07,
857
+ "learning_rate": 6.65e-05,
858
+ "loss": 0.3846,
859
+ "step": 1330
860
+ },
861
+ {
862
+ "epoch": 7.13,
863
+ "learning_rate": 6.7e-05,
864
+ "loss": 0.8436,
865
+ "step": 1340
866
+ },
867
+ {
868
+ "epoch": 7.18,
869
+ "learning_rate": 6.75e-05,
870
+ "loss": 0.4762,
871
+ "step": 1350
872
+ },
873
+ {
874
+ "epoch": 7.23,
875
+ "learning_rate": 6.800000000000001e-05,
876
+ "loss": 0.5856,
877
+ "step": 1360
878
+ },
879
+ {
880
+ "epoch": 7.29,
881
+ "learning_rate": 6.850000000000001e-05,
882
+ "loss": 0.4042,
883
+ "step": 1370
884
+ },
885
+ {
886
+ "epoch": 7.34,
887
+ "learning_rate": 6.900000000000001e-05,
888
+ "loss": 0.475,
889
+ "step": 1380
890
+ },
891
+ {
892
+ "epoch": 7.39,
893
+ "learning_rate": 6.950000000000001e-05,
894
+ "loss": 0.3535,
895
+ "step": 1390
896
+ },
897
+ {
898
+ "epoch": 7.45,
899
+ "learning_rate": 7.000000000000001e-05,
900
+ "loss": 0.2399,
901
+ "step": 1400
902
+ },
903
+ {
904
+ "epoch": 7.5,
905
+ "learning_rate": 7.049999999999999e-05,
906
+ "loss": 0.316,
907
+ "step": 1410
908
+ },
909
+ {
910
+ "epoch": 7.55,
911
+ "learning_rate": 7.095e-05,
912
+ "loss": 0.438,
913
+ "step": 1420
914
+ },
915
+ {
916
+ "epoch": 7.61,
917
+ "learning_rate": 7.145e-05,
918
+ "loss": 0.2673,
919
+ "step": 1430
920
+ },
921
+ {
922
+ "epoch": 7.66,
923
+ "learning_rate": 7.195e-05,
924
+ "loss": 0.4286,
925
+ "step": 1440
926
+ },
927
+ {
928
+ "epoch": 7.71,
929
+ "learning_rate": 7.245e-05,
930
+ "loss": 0.3655,
931
+ "step": 1450
932
+ },
933
+ {
934
+ "epoch": 7.77,
935
+ "learning_rate": 7.295e-05,
936
+ "loss": 0.434,
937
+ "step": 1460
938
+ },
939
+ {
940
+ "epoch": 7.82,
941
+ "learning_rate": 7.345e-05,
942
+ "loss": 0.6143,
943
+ "step": 1470
944
+ },
945
+ {
946
+ "epoch": 7.87,
947
+ "learning_rate": 7.395000000000001e-05,
948
+ "loss": 0.5471,
949
+ "step": 1480
950
+ },
951
+ {
952
+ "epoch": 7.93,
953
+ "learning_rate": 7.445000000000001e-05,
954
+ "loss": 0.299,
955
+ "step": 1490
956
+ },
957
+ {
958
+ "epoch": 7.98,
959
+ "learning_rate": 7.495e-05,
960
+ "loss": 0.2216,
961
+ "step": 1500
962
+ },
963
+ {
964
+ "epoch": 7.98,
965
+ "eval_accuracy": 0.8773333333333333,
966
+ "eval_combined_score": 0.8618647537688802,
967
+ "eval_f1": 0.8431508113173474,
968
+ "eval_loss": 0.5708244442939758,
969
+ "eval_precision": 0.8496415370915067,
970
+ "eval_recall": 0.8773333333333333,
971
+ "eval_runtime": 6.0286,
972
+ "eval_samples_per_second": 248.816,
973
+ "eval_steps_per_second": 7.796,
974
+ "step": 1500
975
+ },
976
+ {
977
+ "epoch": 8.03,
978
+ "learning_rate": 7.545e-05,
979
+ "loss": 0.5517,
980
+ "step": 1510
981
+ },
982
+ {
983
+ "epoch": 8.09,
984
+ "learning_rate": 7.595e-05,
985
+ "loss": 0.3237,
986
+ "step": 1520
987
+ },
988
+ {
989
+ "epoch": 8.14,
990
+ "learning_rate": 7.645e-05,
991
+ "loss": 0.3945,
992
+ "step": 1530
993
+ },
994
+ {
995
+ "epoch": 8.19,
996
+ "learning_rate": 7.695e-05,
997
+ "loss": 0.3996,
998
+ "step": 1540
999
+ },
1000
+ {
1001
+ "epoch": 8.24,
1002
+ "learning_rate": 7.745000000000001e-05,
1003
+ "loss": 0.3644,
1004
+ "step": 1550
1005
+ },
1006
+ {
1007
+ "epoch": 8.3,
1008
+ "learning_rate": 7.795000000000001e-05,
1009
+ "loss": 0.6767,
1010
+ "step": 1560
1011
+ },
1012
+ {
1013
+ "epoch": 8.35,
1014
+ "learning_rate": 7.845000000000001e-05,
1015
+ "loss": 0.434,
1016
+ "step": 1570
1017
+ },
1018
+ {
1019
+ "epoch": 8.4,
1020
+ "learning_rate": 7.895000000000001e-05,
1021
+ "loss": 0.4047,
1022
+ "step": 1580
1023
+ },
1024
+ {
1025
+ "epoch": 8.46,
1026
+ "learning_rate": 7.945e-05,
1027
+ "loss": 0.3844,
1028
+ "step": 1590
1029
+ },
1030
+ {
1031
+ "epoch": 8.51,
1032
+ "learning_rate": 7.994999999999999e-05,
1033
+ "loss": 0.2656,
1034
+ "step": 1600
1035
+ },
1036
+ {
1037
+ "epoch": 8.56,
1038
+ "learning_rate": 8.044999999999999e-05,
1039
+ "loss": 0.3318,
1040
+ "step": 1610
1041
+ },
1042
+ {
1043
+ "epoch": 8.62,
1044
+ "learning_rate": 8.095e-05,
1045
+ "loss": 0.4627,
1046
+ "step": 1620
1047
+ },
1048
+ {
1049
+ "epoch": 8.67,
1050
+ "learning_rate": 8.145e-05,
1051
+ "loss": 0.6657,
1052
+ "step": 1630
1053
+ },
1054
+ {
1055
+ "epoch": 8.72,
1056
+ "learning_rate": 8.195e-05,
1057
+ "loss": 0.353,
1058
+ "step": 1640
1059
+ },
1060
+ {
1061
+ "epoch": 8.78,
1062
+ "learning_rate": 8.245e-05,
1063
+ "loss": 0.4543,
1064
+ "step": 1650
1065
+ },
1066
+ {
1067
+ "epoch": 8.83,
1068
+ "learning_rate": 8.295e-05,
1069
+ "loss": 0.4121,
1070
+ "step": 1660
1071
+ },
1072
+ {
1073
+ "epoch": 8.88,
1074
+ "learning_rate": 8.340000000000001e-05,
1075
+ "loss": 0.2792,
1076
+ "step": 1670
1077
+ },
1078
+ {
1079
+ "epoch": 8.94,
1080
+ "learning_rate": 8.39e-05,
1081
+ "loss": 0.3618,
1082
+ "step": 1680
1083
+ },
1084
+ {
1085
+ "epoch": 8.99,
1086
+ "learning_rate": 8.44e-05,
1087
+ "loss": 0.4922,
1088
+ "step": 1690
1089
+ },
1090
+ {
1091
+ "epoch": 9.04,
1092
+ "learning_rate": 8.49e-05,
1093
+ "loss": 0.6822,
1094
+ "step": 1700
1095
+ },
1096
+ {
1097
+ "epoch": 9.1,
1098
+ "learning_rate": 8.54e-05,
1099
+ "loss": 0.4936,
1100
+ "step": 1710
1101
+ },
1102
+ {
1103
+ "epoch": 9.15,
1104
+ "learning_rate": 8.59e-05,
1105
+ "loss": 0.4652,
1106
+ "step": 1720
1107
+ },
1108
+ {
1109
+ "epoch": 9.2,
1110
+ "learning_rate": 8.640000000000001e-05,
1111
+ "loss": 0.3529,
1112
+ "step": 1730
1113
+ },
1114
+ {
1115
+ "epoch": 9.26,
1116
+ "learning_rate": 8.690000000000001e-05,
1117
+ "loss": 0.5115,
1118
+ "step": 1740
1119
+ },
1120
+ {
1121
+ "epoch": 9.31,
1122
+ "learning_rate": 8.740000000000001e-05,
1123
+ "loss": 0.5859,
1124
+ "step": 1750
1125
+ },
1126
+ {
1127
+ "epoch": 9.36,
1128
+ "learning_rate": 8.790000000000001e-05,
1129
+ "loss": 0.5069,
1130
+ "step": 1760
1131
+ },
1132
+ {
1133
+ "epoch": 9.41,
1134
+ "learning_rate": 8.840000000000001e-05,
1135
+ "loss": 0.2584,
1136
+ "step": 1770
1137
+ },
1138
+ {
1139
+ "epoch": 9.47,
1140
+ "learning_rate": 8.885e-05,
1141
+ "loss": 0.313,
1142
+ "step": 1780
1143
+ },
1144
+ {
1145
+ "epoch": 9.52,
1146
+ "learning_rate": 8.935e-05,
1147
+ "loss": 0.4103,
1148
+ "step": 1790
1149
+ },
1150
+ {
1151
+ "epoch": 9.57,
1152
+ "learning_rate": 8.985e-05,
1153
+ "loss": 0.3214,
1154
+ "step": 1800
1155
+ },
1156
+ {
1157
+ "epoch": 9.57,
1158
+ "eval_accuracy": 0.896,
1159
+ "eval_combined_score": 0.8819938675326909,
1160
+ "eval_f1": 0.8583787127091727,
1161
+ "eval_loss": 0.45495954155921936,
1162
+ "eval_precision": 0.8775967574215913,
1163
+ "eval_recall": 0.896,
1164
+ "eval_runtime": 6.1706,
1165
+ "eval_samples_per_second": 243.087,
1166
+ "eval_steps_per_second": 7.617,
1167
+ "step": 1800
1168
+ },
1169
+ {
1170
+ "epoch": 9.63,
1171
+ "learning_rate": 9.035e-05,
1172
+ "loss": 0.5097,
1173
+ "step": 1810
1174
+ },
1175
+ {
1176
+ "epoch": 9.68,
1177
+ "learning_rate": 9.085e-05,
1178
+ "loss": 0.4306,
1179
+ "step": 1820
1180
+ },
1181
+ {
1182
+ "epoch": 9.73,
1183
+ "learning_rate": 9.135e-05,
1184
+ "loss": 0.2573,
1185
+ "step": 1830
1186
+ },
1187
+ {
1188
+ "epoch": 9.79,
1189
+ "learning_rate": 9.185e-05,
1190
+ "loss": 0.3534,
1191
+ "step": 1840
1192
+ },
1193
+ {
1194
+ "epoch": 9.84,
1195
+ "learning_rate": 9.235000000000001e-05,
1196
+ "loss": 0.5012,
1197
+ "step": 1850
1198
+ },
1199
+ {
1200
+ "epoch": 9.89,
1201
+ "learning_rate": 9.285000000000001e-05,
1202
+ "loss": 0.7057,
1203
+ "step": 1860
1204
+ },
1205
+ {
1206
+ "epoch": 9.95,
1207
+ "learning_rate": 9.335e-05,
1208
+ "loss": 0.5385,
1209
+ "step": 1870
1210
+ },
1211
+ {
1212
+ "epoch": 10.0,
1213
+ "learning_rate": 9.385e-05,
1214
+ "loss": 0.2633,
1215
+ "step": 1880
1216
+ },
1217
+ {
1218
+ "epoch": 10.05,
1219
+ "learning_rate": 9.435e-05,
1220
+ "loss": 0.3853,
1221
+ "step": 1890
1222
+ },
1223
+ {
1224
+ "epoch": 10.11,
1225
+ "learning_rate": 9.485e-05,
1226
+ "loss": 0.4354,
1227
+ "step": 1900
1228
+ },
1229
+ {
1230
+ "epoch": 10.16,
1231
+ "learning_rate": 9.535e-05,
1232
+ "loss": 0.7459,
1233
+ "step": 1910
1234
+ },
1235
+ {
1236
+ "epoch": 10.21,
1237
+ "learning_rate": 9.585000000000001e-05,
1238
+ "loss": 0.2937,
1239
+ "step": 1920
1240
+ },
1241
+ {
1242
+ "epoch": 10.27,
1243
+ "learning_rate": 9.635000000000001e-05,
1244
+ "loss": 0.2932,
1245
+ "step": 1930
1246
+ },
1247
+ {
1248
+ "epoch": 10.32,
1249
+ "learning_rate": 9.685000000000001e-05,
1250
+ "loss": 0.4256,
1251
+ "step": 1940
1252
+ },
1253
+ {
1254
+ "epoch": 10.37,
1255
+ "learning_rate": 9.735000000000001e-05,
1256
+ "loss": 0.4336,
1257
+ "step": 1950
1258
+ },
1259
+ {
1260
+ "epoch": 10.43,
1261
+ "learning_rate": 9.785e-05,
1262
+ "loss": 0.4684,
1263
+ "step": 1960
1264
+ },
1265
+ {
1266
+ "epoch": 10.48,
1267
+ "learning_rate": 9.835e-05,
1268
+ "loss": 0.5437,
1269
+ "step": 1970
1270
+ },
1271
+ {
1272
+ "epoch": 10.53,
1273
+ "learning_rate": 9.884999999999999e-05,
1274
+ "loss": 0.3036,
1275
+ "step": 1980
1276
+ },
1277
+ {
1278
+ "epoch": 10.59,
1279
+ "learning_rate": 9.935e-05,
1280
+ "loss": 0.1528,
1281
+ "step": 1990
1282
+ },
1283
+ {
1284
+ "epoch": 10.64,
1285
+ "learning_rate": 9.985e-05,
1286
+ "loss": 0.8779,
1287
+ "step": 2000
1288
+ },
1289
+ {
1290
+ "epoch": 10.69,
1291
+ "learning_rate": 0.00010035,
1292
+ "loss": 0.53,
1293
+ "step": 2010
1294
+ },
1295
+ {
1296
+ "epoch": 10.74,
1297
+ "learning_rate": 0.00010085,
1298
+ "loss": 0.2656,
1299
+ "step": 2020
1300
+ },
1301
+ {
1302
+ "epoch": 10.8,
1303
+ "learning_rate": 0.00010135,
1304
+ "loss": 0.3895,
1305
+ "step": 2030
1306
+ },
1307
+ {
1308
+ "epoch": 10.85,
1309
+ "learning_rate": 0.00010185,
1310
+ "loss": 0.5943,
1311
+ "step": 2040
1312
+ },
1313
+ {
1314
+ "epoch": 10.9,
1315
+ "learning_rate": 0.00010235,
1316
+ "loss": 0.5021,
1317
+ "step": 2050
1318
+ },
1319
+ {
1320
+ "epoch": 10.96,
1321
+ "learning_rate": 0.00010284999999999999,
1322
+ "loss": 0.3804,
1323
+ "step": 2060
1324
+ },
1325
+ {
1326
+ "epoch": 11.01,
1327
+ "learning_rate": 0.00010335,
1328
+ "loss": 0.2204,
1329
+ "step": 2070
1330
+ },
1331
+ {
1332
+ "epoch": 11.06,
1333
+ "learning_rate": 0.00010385,
1334
+ "loss": 0.4421,
1335
+ "step": 2080
1336
+ },
1337
+ {
1338
+ "epoch": 11.12,
1339
+ "learning_rate": 0.0001043,
1340
+ "loss": 1.0409,
1341
+ "step": 2090
1342
+ },
1343
+ {
1344
+ "epoch": 11.17,
1345
+ "learning_rate": 0.00010480000000000001,
1346
+ "loss": 0.7521,
1347
+ "step": 2100
1348
+ },
1349
+ {
1350
+ "epoch": 11.17,
1351
+ "eval_accuracy": 0.884,
1352
+ "eval_combined_score": 0.8540521114926303,
1353
+ "eval_f1": 0.8422803386258884,
1354
+ "eval_loss": 0.3819296956062317,
1355
+ "eval_precision": 0.8059281073446327,
1356
+ "eval_recall": 0.884,
1357
+ "eval_runtime": 6.0267,
1358
+ "eval_samples_per_second": 248.894,
1359
+ "eval_steps_per_second": 7.799,
1360
+ "step": 2100
1361
+ },
1362
+ {
1363
+ "epoch": 11.22,
1364
+ "learning_rate": 0.00010530000000000001,
1365
+ "loss": 0.3117,
1366
+ "step": 2110
1367
+ },
1368
+ {
1369
+ "epoch": 11.28,
1370
+ "learning_rate": 0.00010580000000000001,
1371
+ "loss": 0.4548,
1372
+ "step": 2120
1373
+ },
1374
+ {
1375
+ "epoch": 11.33,
1376
+ "learning_rate": 0.00010630000000000001,
1377
+ "loss": 0.484,
1378
+ "step": 2130
1379
+ },
1380
+ {
1381
+ "epoch": 11.38,
1382
+ "learning_rate": 0.00010680000000000001,
1383
+ "loss": 0.4479,
1384
+ "step": 2140
1385
+ },
1386
+ {
1387
+ "epoch": 11.44,
1388
+ "learning_rate": 0.0001073,
1389
+ "loss": 0.3099,
1390
+ "step": 2150
1391
+ },
1392
+ {
1393
+ "epoch": 11.49,
1394
+ "learning_rate": 0.0001078,
1395
+ "loss": 0.4728,
1396
+ "step": 2160
1397
+ },
1398
+ {
1399
+ "epoch": 11.54,
1400
+ "learning_rate": 0.00010829999999999999,
1401
+ "loss": 0.5189,
1402
+ "step": 2170
1403
+ },
1404
+ {
1405
+ "epoch": 11.6,
1406
+ "learning_rate": 0.0001088,
1407
+ "loss": 0.5615,
1408
+ "step": 2180
1409
+ },
1410
+ {
1411
+ "epoch": 11.65,
1412
+ "learning_rate": 0.0001093,
1413
+ "loss": 0.536,
1414
+ "step": 2190
1415
+ },
1416
+ {
1417
+ "epoch": 11.7,
1418
+ "learning_rate": 0.0001098,
1419
+ "loss": 0.4329,
1420
+ "step": 2200
1421
+ },
1422
+ {
1423
+ "epoch": 11.76,
1424
+ "learning_rate": 0.0001103,
1425
+ "loss": 0.2304,
1426
+ "step": 2210
1427
+ },
1428
+ {
1429
+ "epoch": 11.81,
1430
+ "learning_rate": 0.0001108,
1431
+ "loss": 0.5139,
1432
+ "step": 2220
1433
+ },
1434
+ {
1435
+ "epoch": 11.86,
1436
+ "learning_rate": 0.0001113,
1437
+ "loss": 0.3353,
1438
+ "step": 2230
1439
+ },
1440
+ {
1441
+ "epoch": 11.91,
1442
+ "learning_rate": 0.0001118,
1443
+ "loss": 0.7493,
1444
+ "step": 2240
1445
+ },
1446
+ {
1447
+ "epoch": 11.97,
1448
+ "learning_rate": 0.0001123,
1449
+ "loss": 1.0483,
1450
+ "step": 2250
1451
+ },
1452
+ {
1453
+ "epoch": 12.02,
1454
+ "learning_rate": 0.0001128,
1455
+ "loss": 0.5422,
1456
+ "step": 2260
1457
+ },
1458
+ {
1459
+ "epoch": 12.07,
1460
+ "learning_rate": 0.0001133,
1461
+ "loss": 0.771,
1462
+ "step": 2270
1463
+ },
1464
+ {
1465
+ "epoch": 12.13,
1466
+ "learning_rate": 0.0001138,
1467
+ "loss": 0.841,
1468
+ "step": 2280
1469
+ },
1470
+ {
1471
+ "epoch": 12.18,
1472
+ "learning_rate": 0.0001143,
1473
+ "loss": 0.7316,
1474
+ "step": 2290
1475
+ },
1476
+ {
1477
+ "epoch": 12.23,
1478
+ "learning_rate": 0.0001148,
1479
+ "loss": 0.7394,
1480
+ "step": 2300
1481
+ },
1482
+ {
1483
+ "epoch": 12.29,
1484
+ "learning_rate": 0.0001153,
1485
+ "loss": 0.7084,
1486
+ "step": 2310
1487
+ },
1488
+ {
1489
+ "epoch": 12.34,
1490
+ "learning_rate": 0.0001158,
1491
+ "loss": 0.8033,
1492
+ "step": 2320
1493
+ },
1494
+ {
1495
+ "epoch": 12.39,
1496
+ "learning_rate": 0.00011630000000000001,
1497
+ "loss": 0.6616,
1498
+ "step": 2330
1499
+ },
1500
+ {
1501
+ "epoch": 12.45,
1502
+ "learning_rate": 0.0001168,
1503
+ "loss": 0.7601,
1504
+ "step": 2340
1505
+ },
1506
+ {
1507
+ "epoch": 12.5,
1508
+ "learning_rate": 0.0001173,
1509
+ "loss": 0.7986,
1510
+ "step": 2350
1511
+ },
1512
+ {
1513
+ "epoch": 12.55,
1514
+ "learning_rate": 0.0001178,
1515
+ "loss": 0.673,
1516
+ "step": 2360
1517
+ },
1518
+ {
1519
+ "epoch": 12.61,
1520
+ "learning_rate": 0.0001183,
1521
+ "loss": 0.6354,
1522
+ "step": 2370
1523
+ },
1524
+ {
1525
+ "epoch": 12.66,
1526
+ "learning_rate": 0.0001188,
1527
+ "loss": 0.617,
1528
+ "step": 2380
1529
+ },
1530
+ {
1531
+ "epoch": 12.71,
1532
+ "learning_rate": 0.0001193,
1533
+ "loss": 0.5532,
1534
+ "step": 2390
1535
+ },
1536
+ {
1537
+ "epoch": 12.77,
1538
+ "learning_rate": 0.00011980000000000001,
1539
+ "loss": 0.5048,
1540
+ "step": 2400
1541
+ },
1542
+ {
1543
+ "epoch": 12.77,
1544
+ "eval_accuracy": 0.7386666666666667,
1545
+ "eval_combined_score": 0.6626504648943422,
1546
+ "eval_f1": 0.6276400817995911,
1547
+ "eval_loss": 0.6582097411155701,
1548
+ "eval_precision": 0.5456284444444445,
1549
+ "eval_recall": 0.7386666666666667,
1550
+ "eval_runtime": 5.7153,
1551
+ "eval_samples_per_second": 262.451,
1552
+ "eval_steps_per_second": 8.223,
1553
+ "step": 2400
1554
+ }
1555
+ ],
1556
+ "logging_steps": 10,
1557
+ "max_steps": 5640,
1558
+ "num_train_epochs": 30,
1559
+ "save_steps": 300,
1560
+ "total_flos": 5039147999821824.0,
1561
+ "trial_name": null,
1562
+ "trial_params": null
1563
+ }
checkpoint-2400/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:05b42247af1886e7e4142c2daf8d1dc0efc9906f62aab884f8ebe56da5f0ce3e
3
+ size 4472
checkpoint-2400/vocab.json ADDED
The diff for this file is too large to render. See raw diff