kooff11 commited on
Commit
afa5742
·
verified ·
1 Parent(s): a52396a

Training in progress, step 39, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d8e90f8b2d9efe89ecf6005b90cee9f2ff75ee0ae4bad05048bf3b5ed194c681
3
  size 167832240
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:50eaa409ee5f0a93f2c540fbca54989c5aefa21b1273c5d5b1d886ba09a20469
3
  size 167832240
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f41172cfd16f5eea55c82a17063631317baf49c5f4d5ecc40f7960c2172a8ef5
3
  size 85723284
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef68d332673955da9d5dae7c171321f7a76837999c66b0e34d0f03757d7a5890
3
  size 85723284
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e97b7009911df3c6dbe7894b6ee9da598c21826b98bea0a9fff09e958541373f
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:84783094152d26fbafe146a53398686233c3cc9dfb087efb955ba19f4d58f158
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e478bf29fddd2bf8f7498cec7a413de2c2afd2062f64a9f38a4ef3c5a20e6a2d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aae2c39cb89433419e485dd995b7c5858a698c39b6628327b4757d3c15aab5bb
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.06863553868998515,
5
  "eval_steps": 13,
6
- "global_step": 26,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -213,6 +213,105 @@
213
  "eval_samples_per_second": 5.568,
214
  "eval_steps_per_second": 2.786,
215
  "step": 26
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
216
  }
217
  ],
218
  "logging_steps": 1,
@@ -232,7 +331,7 @@
232
  "attributes": {}
233
  }
234
  },
235
- "total_flos": 3.086251099611464e+17,
236
  "train_batch_size": 2,
237
  "trial_name": null,
238
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.10295330803497772,
5
  "eval_steps": 13,
6
+ "global_step": 39,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
213
  "eval_samples_per_second": 5.568,
214
  "eval_steps_per_second": 2.786,
215
  "step": 26
216
+ },
217
+ {
218
+ "epoch": 0.07127536710113842,
219
+ "grad_norm": 3.462208769633435e-05,
220
+ "learning_rate": 0.0001401895306154785,
221
+ "loss": 0.0,
222
+ "step": 27
223
+ },
224
+ {
225
+ "epoch": 0.0739151955122917,
226
+ "grad_norm": 2.7388192393118516e-05,
227
+ "learning_rate": 0.00013042107116699228,
228
+ "loss": 0.0,
229
+ "step": 28
230
+ },
231
+ {
232
+ "epoch": 0.07655502392344497,
233
+ "grad_norm": 3.9361602830467746e-05,
234
+ "learning_rate": 0.00012073645169758076,
235
+ "loss": 0.0,
236
+ "step": 29
237
+ },
238
+ {
239
+ "epoch": 0.07919485233459825,
240
+ "grad_norm": 2.9102855478413403e-05,
241
+ "learning_rate": 0.00011117714323462186,
242
+ "loss": 0.0,
243
+ "step": 30
244
+ },
245
+ {
246
+ "epoch": 0.08183468074575152,
247
+ "grad_norm": 2.171610321966e-05,
248
+ "learning_rate": 0.00010178408020452579,
249
+ "loss": 0.0,
250
+ "step": 31
251
+ },
252
+ {
253
+ "epoch": 0.0844745091569048,
254
+ "grad_norm": 2.040547587967012e-05,
255
+ "learning_rate": 9.259748514523653e-05,
256
+ "loss": 0.0,
257
+ "step": 32
258
+ },
259
+ {
260
+ "epoch": 0.08711433756805807,
261
+ "grad_norm": 1.634558975638356e-05,
262
+ "learning_rate": 8.365669646714983e-05,
263
+ "loss": 0.0,
264
+ "step": 33
265
+ },
266
+ {
267
+ "epoch": 0.08975416597921135,
268
+ "grad_norm": 2.2022310076863505e-05,
269
+ "learning_rate": 7.500000000000002e-05,
270
+ "loss": 0.0,
271
+ "step": 34
272
+ },
273
+ {
274
+ "epoch": 0.09239399439036462,
275
+ "grad_norm": 1.6614567357464693e-05,
276
+ "learning_rate": 6.66644650470597e-05,
277
+ "loss": 0.0,
278
+ "step": 35
279
+ },
280
+ {
281
+ "epoch": 0.0950338228015179,
282
+ "grad_norm": 1.5442792573594488e-05,
283
+ "learning_rate": 5.8685785648691894e-05,
284
+ "loss": 0.0,
285
+ "step": 36
286
+ },
287
+ {
288
+ "epoch": 0.09767365121267117,
289
+ "grad_norm": 1.3897730241296813e-05,
290
+ "learning_rate": 5.109812773498967e-05,
291
+ "loss": 0.0,
292
+ "step": 37
293
+ },
294
+ {
295
+ "epoch": 0.10031347962382445,
296
+ "grad_norm": 1.476151192036923e-05,
297
+ "learning_rate": 4.3933982822017876e-05,
298
+ "loss": 0.0,
299
+ "step": 38
300
+ },
301
+ {
302
+ "epoch": 0.10295330803497772,
303
+ "grad_norm": 1.7724401914165355e-05,
304
+ "learning_rate": 3.72240288781534e-05,
305
+ "loss": 0.0,
306
+ "step": 39
307
+ },
308
+ {
309
+ "epoch": 0.10295330803497772,
310
+ "eval_loss": 1.1696874935296364e-07,
311
+ "eval_runtime": 229.2689,
312
+ "eval_samples_per_second": 5.57,
313
+ "eval_steps_per_second": 2.787,
314
+ "step": 39
315
  }
316
  ],
317
  "logging_steps": 1,
 
331
  "attributes": {}
332
  }
333
  },
334
+ "total_flos": 4.6293766494171955e+17,
335
  "train_batch_size": 2,
336
  "trial_name": null,
337
  "trial_params": null