Xmm commited on
Commit
e3cb863
·
1 Parent(s): 05f8090

Upload 13 files

Browse files
Files changed (6) hide show
  1. optimizer.pt +2 -2
  2. pytorch_model.bin +2 -2
  3. rng_state.pth +2 -2
  4. scheduler.pt +2 -2
  5. trainer_state.json +213 -37
  6. training_args.bin +2 -2
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2eebdf89f63210f98912665ade5f147d6f175d6bfec3e113caea30c6f21f5633
3
- size 4921022932
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:363505cec6ae12edbdab7483a99dd5dabd86465bc30901b8313cd858c4ce508b
3
+ size 4921023381
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d2e9ce5a959fab6c79e80f68ccd5df8d82151cc4baa44b3855c0186201979e6f
3
- size 2460468737
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0c75b9dea114d972be227686e7b86d98818144a94bd373a007d578b0c5bc3a3c
3
+ size 2460469182
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c355a37d69d0547cb02f29a95e2a61f87cc33a8ab0d5dc833ec916600db936e9
3
- size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:01e04213085dd5e20802745853457e0bc62c5da41fc6fdd60cd5770020ea2137
3
+ size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:82fa38df0f02319ecaf4e43060afc1125beba62a6d3e8f96799a5efccae7cb3a
3
- size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c79ebef8c5a365223a47afc841c570f47e329d645c96a8d17b00f2936856436
3
+ size 1064
trainer_state.json CHANGED
@@ -1,67 +1,243 @@
1
  {
2
- "best_metric": 0.3143588602542877,
3
- "best_model_checkpoint": "./checkpoint-th/checkpoint-1500",
4
- "epoch": 0.2763894327106894,
5
  "eval_steps": 500,
6
- "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.09,
13
- "learning_rate": 1.963147226828819e-05,
14
- "loss": 4.6049,
15
  "step": 500
16
  },
17
  {
18
- "epoch": 0.09,
19
- "eval_bleu": 24.7133,
20
- "eval_gen_len": 33.9437,
21
- "eval_loss": 1.1478698253631592,
22
- "eval_runtime": 214.6895,
23
- "eval_samples_per_second": 4.714,
24
- "eval_steps_per_second": 1.178,
25
  "step": 500
26
  },
27
  {
28
- "epoch": 0.18,
29
- "learning_rate": 1.9262944536576377e-05,
30
- "loss": 0.3574,
31
  "step": 1000
32
  },
33
  {
34
- "epoch": 0.18,
35
- "eval_bleu": 25.1845,
36
- "eval_gen_len": 34.0662,
37
- "eval_loss": 0.31802815198898315,
38
- "eval_runtime": 214.1787,
39
- "eval_samples_per_second": 4.725,
40
- "eval_steps_per_second": 1.181,
41
  "step": 1000
42
  },
43
  {
44
- "epoch": 0.28,
45
- "learning_rate": 1.8894416804864568e-05,
46
- "loss": 0.1925,
47
  "step": 1500
48
  },
49
  {
50
- "epoch": 0.28,
51
- "eval_bleu": 25.5018,
52
- "eval_gen_len": 33.7075,
53
- "eval_loss": 0.3143588602542877,
54
- "eval_runtime": 211.869,
55
- "eval_samples_per_second": 4.777,
56
- "eval_steps_per_second": 1.194,
57
  "step": 1500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
58
  }
59
  ],
60
  "logging_steps": 500,
61
- "max_steps": 27135,
62
- "num_train_epochs": 5,
63
  "save_steps": 500,
64
- "total_flos": 1.04021020901376e+17,
65
  "trial_name": null,
66
  "trial_params": null
67
  }
 
1
  {
2
+ "best_metric": 0.28169530630111694,
3
+ "best_model_checkpoint": "./checkpoint-lo/checkpoint-1500",
4
+ "epoch": 12.698301245753115,
5
  "eval_steps": 500,
6
+ "global_step": 7000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.91,
13
+ "learning_rate": 1.8790078644888082e-05,
14
+ "loss": 4.5919,
15
  "step": 500
16
  },
17
  {
18
+ "epoch": 0.91,
19
+ "eval_bleu": 32.6249,
20
+ "eval_gen_len": 32.6077,
21
+ "eval_loss": 1.1729092597961426,
22
+ "eval_runtime": 207.0111,
23
+ "eval_samples_per_second": 4.889,
24
+ "eval_steps_per_second": 1.222,
25
  "step": 500
26
  },
27
  {
28
+ "epoch": 1.81,
29
+ "learning_rate": 1.7580157289776165e-05,
30
+ "loss": 0.3216,
31
  "step": 1000
32
  },
33
  {
34
+ "epoch": 1.81,
35
+ "eval_bleu": 33.2994,
36
+ "eval_gen_len": 32.8577,
37
+ "eval_loss": 0.2831147611141205,
38
+ "eval_runtime": 231.4494,
39
+ "eval_samples_per_second": 4.372,
40
+ "eval_steps_per_second": 1.093,
41
  "step": 1000
42
  },
43
  {
44
+ "epoch": 2.72,
45
+ "learning_rate": 1.637023593466425e-05,
46
+ "loss": 0.1325,
47
  "step": 1500
48
  },
49
  {
50
+ "epoch": 2.72,
51
+ "eval_bleu": 33.7596,
52
+ "eval_gen_len": 32.5978,
53
+ "eval_loss": 0.28169530630111694,
54
+ "eval_runtime": 195.7742,
55
+ "eval_samples_per_second": 5.169,
56
+ "eval_steps_per_second": 1.292,
57
  "step": 1500
58
+ },
59
+ {
60
+ "epoch": 3.63,
61
+ "learning_rate": 1.516031457955233e-05,
62
+ "loss": 0.2509,
63
+ "step": 2000
64
+ },
65
+ {
66
+ "epoch": 3.63,
67
+ "eval_bleu": 8.1225,
68
+ "eval_gen_len": 42.5958,
69
+ "eval_loss": 0.576555609703064,
70
+ "eval_runtime": 240.5328,
71
+ "eval_samples_per_second": 4.207,
72
+ "eval_steps_per_second": 1.052,
73
+ "step": 2000
74
+ },
75
+ {
76
+ "epoch": 4.53,
77
+ "learning_rate": 1.3950393224440413e-05,
78
+ "loss": 0.2255,
79
+ "step": 2500
80
+ },
81
+ {
82
+ "epoch": 4.53,
83
+ "eval_bleu": 7.8932,
84
+ "eval_gen_len": 43.3468,
85
+ "eval_loss": 0.5630556344985962,
86
+ "eval_runtime": 238.3063,
87
+ "eval_samples_per_second": 4.247,
88
+ "eval_steps_per_second": 1.062,
89
+ "step": 2500
90
+ },
91
+ {
92
+ "epoch": 5.44,
93
+ "learning_rate": 1.2740471869328494e-05,
94
+ "loss": 0.2123,
95
+ "step": 3000
96
+ },
97
+ {
98
+ "epoch": 5.44,
99
+ "eval_bleu": 7.8523,
100
+ "eval_gen_len": 43.2866,
101
+ "eval_loss": 0.5581173896789551,
102
+ "eval_runtime": 238.527,
103
+ "eval_samples_per_second": 4.243,
104
+ "eval_steps_per_second": 1.061,
105
+ "step": 3000
106
+ },
107
+ {
108
+ "epoch": 6.35,
109
+ "learning_rate": 1.1530550514216576e-05,
110
+ "loss": 0.2061,
111
+ "step": 3500
112
+ },
113
+ {
114
+ "epoch": 6.35,
115
+ "eval_bleu": 7.8532,
116
+ "eval_gen_len": 42.9358,
117
+ "eval_loss": 0.555178701877594,
118
+ "eval_runtime": 243.8895,
119
+ "eval_samples_per_second": 4.149,
120
+ "eval_steps_per_second": 1.037,
121
+ "step": 3500
122
+ },
123
+ {
124
+ "epoch": 7.25,
125
+ "learning_rate": 1.0320629159104658e-05,
126
+ "loss": 0.2001,
127
+ "step": 4000
128
+ },
129
+ {
130
+ "epoch": 7.25,
131
+ "eval_bleu": 7.8643,
132
+ "eval_gen_len": 43.7075,
133
+ "eval_loss": 0.5538426637649536,
134
+ "eval_runtime": 243.9747,
135
+ "eval_samples_per_second": 4.148,
136
+ "eval_steps_per_second": 1.037,
137
+ "step": 4000
138
+ },
139
+ {
140
+ "epoch": 8.16,
141
+ "learning_rate": 9.110707803992742e-06,
142
+ "loss": 0.1935,
143
+ "step": 4500
144
+ },
145
+ {
146
+ "epoch": 8.16,
147
+ "eval_bleu": 7.884,
148
+ "eval_gen_len": 43.6126,
149
+ "eval_loss": 0.5509431958198547,
150
+ "eval_runtime": 253.7386,
151
+ "eval_samples_per_second": 3.988,
152
+ "eval_steps_per_second": 0.997,
153
+ "step": 4500
154
+ },
155
+ {
156
+ "epoch": 9.07,
157
+ "learning_rate": 7.900786448880823e-06,
158
+ "loss": 0.1902,
159
+ "step": 5000
160
+ },
161
+ {
162
+ "epoch": 9.07,
163
+ "eval_bleu": 7.8327,
164
+ "eval_gen_len": 43.2352,
165
+ "eval_loss": 0.5488432049751282,
166
+ "eval_runtime": 241.3846,
167
+ "eval_samples_per_second": 4.192,
168
+ "eval_steps_per_second": 1.048,
169
+ "step": 5000
170
+ },
171
+ {
172
+ "epoch": 9.97,
173
+ "learning_rate": 6.690865093768906e-06,
174
+ "loss": 0.1867,
175
+ "step": 5500
176
+ },
177
+ {
178
+ "epoch": 9.97,
179
+ "eval_bleu": 7.8753,
180
+ "eval_gen_len": 43.75,
181
+ "eval_loss": 0.5456582903862,
182
+ "eval_runtime": 247.5702,
183
+ "eval_samples_per_second": 4.088,
184
+ "eval_steps_per_second": 1.022,
185
+ "step": 5500
186
+ },
187
+ {
188
+ "epoch": 10.89,
189
+ "learning_rate": 5.480943738656987e-06,
190
+ "loss": 0.1295,
191
+ "step": 6000
192
+ },
193
+ {
194
+ "epoch": 10.89,
195
+ "eval_bleu": 33.7401,
196
+ "eval_gen_len": 32.753,
197
+ "eval_loss": 0.2826240658760071,
198
+ "eval_runtime": 200.2264,
199
+ "eval_samples_per_second": 5.054,
200
+ "eval_steps_per_second": 1.264,
201
+ "step": 6000
202
+ },
203
+ {
204
+ "epoch": 11.79,
205
+ "learning_rate": 4.27102238354507e-06,
206
+ "loss": 0.1255,
207
+ "step": 6500
208
+ },
209
+ {
210
+ "epoch": 11.79,
211
+ "eval_bleu": 33.767,
212
+ "eval_gen_len": 32.7213,
213
+ "eval_loss": 0.2822073698043823,
214
+ "eval_runtime": 206.8407,
215
+ "eval_samples_per_second": 4.893,
216
+ "eval_steps_per_second": 1.223,
217
+ "step": 6500
218
+ },
219
+ {
220
+ "epoch": 12.7,
221
+ "learning_rate": 3.061101028433152e-06,
222
+ "loss": 0.1246,
223
+ "step": 7000
224
+ },
225
+ {
226
+ "epoch": 12.7,
227
+ "eval_bleu": 33.7958,
228
+ "eval_gen_len": 32.7233,
229
+ "eval_loss": 0.2822967767715454,
230
+ "eval_runtime": 200.4031,
231
+ "eval_samples_per_second": 5.05,
232
+ "eval_steps_per_second": 1.262,
233
+ "step": 7000
234
  }
235
  ],
236
  "logging_steps": 500,
237
+ "max_steps": 8265,
238
+ "num_train_epochs": 15,
239
  "save_steps": 500,
240
+ "total_flos": 4.85372919048831e+17,
241
  "trial_name": null,
242
  "trial_params": null
243
  }
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bc9d4a097563d213d1df784742d8f330c779d845f3d571ee263c56996903cd83
3
- size 4155
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc546a60bdbe8984b13752ac638a419b4b874e40de5590070d95dd2d21303c3b
3
+ size 4728