iamnguyen commited on
Commit
2f24b5e
·
verified ·
1 Parent(s): 199d9f8

Training in progress, step 48, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a31a542e2dbb2c9cbe799386642c1e0b77dc5941a765201ea34a5511bc04ab63
3
  size 479769104
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fa6e8dd9233e51c2bb7a726c7c61272bc9817f9ced95cd515b6694d8ba56962
3
  size 479769104
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1ed5cc0419647052fd30648d8c3078b7b3cb31f218b1103a547918cea2f1358f
3
  size 240728084
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c9fde0f59df262cf71a6aabd7d3d62e074982e8aa6dfb99efe152ae0a4de67dc
3
  size 240728084
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85b7a108a3eca8766b6cabea067222bf8fc5ccd85d84371fb8aa5547b4f34b95
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a58743f32ef93b4d40e69256e320c5dd50edad767f0200733beb3941b6d4ca7c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.0020688415093653464,
5
  "eval_steps": 500,
6
- "global_step": 32,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -231,6 +231,118 @@
231
  "learning_rate": 2.0645161290322582e-06,
232
  "loss": 1.5938,
233
  "step": 32
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  }
235
  ],
236
  "logging_steps": 1,
@@ -250,7 +362,7 @@
250
  "attributes": {}
251
  }
252
  },
253
- "total_flos": 2.068879221404467e+16,
254
  "train_batch_size": 2,
255
  "trial_name": null,
256
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.00310326226404802,
5
  "eval_steps": 500,
6
+ "global_step": 48,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
231
  "learning_rate": 2.0645161290322582e-06,
232
  "loss": 1.5938,
233
  "step": 32
234
+ },
235
+ {
236
+ "epoch": 0.0021334928065330134,
237
+ "grad_norm": 5.998967170715332,
238
+ "learning_rate": 2.129032258064516e-06,
239
+ "loss": 1.5716,
240
+ "step": 33
241
+ },
242
+ {
243
+ "epoch": 0.002198144103700681,
244
+ "grad_norm": 6.407791614532471,
245
+ "learning_rate": 2.1935483870967745e-06,
246
+ "loss": 1.5848,
247
+ "step": 34
248
+ },
249
+ {
250
+ "epoch": 0.002262795400868348,
251
+ "grad_norm": 6.110191822052002,
252
+ "learning_rate": 2.2580645161290324e-06,
253
+ "loss": 1.643,
254
+ "step": 35
255
+ },
256
+ {
257
+ "epoch": 0.0023274466980360148,
258
+ "grad_norm": 6.058048248291016,
259
+ "learning_rate": 2.3225806451612907e-06,
260
+ "loss": 1.5242,
261
+ "step": 36
262
+ },
263
+ {
264
+ "epoch": 0.0023920979952036818,
265
+ "grad_norm": 7.348267078399658,
266
+ "learning_rate": 2.3870967741935486e-06,
267
+ "loss": 1.5112,
268
+ "step": 37
269
+ },
270
+ {
271
+ "epoch": 0.002456749292371349,
272
+ "grad_norm": 6.676770210266113,
273
+ "learning_rate": 2.4516129032258066e-06,
274
+ "loss": 1.5184,
275
+ "step": 38
276
+ },
277
+ {
278
+ "epoch": 0.002521400589539016,
279
+ "grad_norm": 9.655611038208008,
280
+ "learning_rate": 2.5161290322580645e-06,
281
+ "loss": 1.4867,
282
+ "step": 39
283
+ },
284
+ {
285
+ "epoch": 0.002586051886706683,
286
+ "grad_norm": 10.357527732849121,
287
+ "learning_rate": 2.580645161290323e-06,
288
+ "loss": 1.5255,
289
+ "step": 40
290
+ },
291
+ {
292
+ "epoch": 0.00265070318387435,
293
+ "grad_norm": 6.523240566253662,
294
+ "learning_rate": 2.645161290322581e-06,
295
+ "loss": 1.5326,
296
+ "step": 41
297
+ },
298
+ {
299
+ "epoch": 0.002715354481042017,
300
+ "grad_norm": 5.7459282875061035,
301
+ "learning_rate": 2.709677419354839e-06,
302
+ "loss": 1.5533,
303
+ "step": 42
304
+ },
305
+ {
306
+ "epoch": 0.0027800057782096845,
307
+ "grad_norm": 5.776258945465088,
308
+ "learning_rate": 2.774193548387097e-06,
309
+ "loss": 1.5261,
310
+ "step": 43
311
+ },
312
+ {
313
+ "epoch": 0.0028446570753773515,
314
+ "grad_norm": 7.176516056060791,
315
+ "learning_rate": 2.8387096774193553e-06,
316
+ "loss": 1.4434,
317
+ "step": 44
318
+ },
319
+ {
320
+ "epoch": 0.0029093083725450185,
321
+ "grad_norm": 6.083931922912598,
322
+ "learning_rate": 2.903225806451613e-06,
323
+ "loss": 1.5127,
324
+ "step": 45
325
+ },
326
+ {
327
+ "epoch": 0.0029739596697126855,
328
+ "grad_norm": 8.212278366088867,
329
+ "learning_rate": 2.967741935483871e-06,
330
+ "loss": 1.569,
331
+ "step": 46
332
+ },
333
+ {
334
+ "epoch": 0.003038610966880353,
335
+ "grad_norm": 6.138173580169678,
336
+ "learning_rate": 3.0322580645161295e-06,
337
+ "loss": 1.4985,
338
+ "step": 47
339
+ },
340
+ {
341
+ "epoch": 0.00310326226404802,
342
+ "grad_norm": 9.2051362991333,
343
+ "learning_rate": 3.0967741935483874e-06,
344
+ "loss": 1.4528,
345
+ "step": 48
346
  }
347
  ],
348
  "logging_steps": 1,
 
362
  "attributes": {}
363
  }
364
  },
365
+ "total_flos": 3.1121051098669056e+16,
366
  "train_batch_size": 2,
367
  "trial_name": null,
368
  "trial_params": null