diagonalge commited on
Commit
72b7008
·
verified ·
1 Parent(s): 8fcd997

Training in progress, step 60, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ed1b6f73d502882db71481cb2bd9301ca90f102a334c47d83a2fba5bd0da6457
3
  size 101752088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:58b2166da01d55a046bf02b2b43ba0d22031515bba03e5d013b617cd11c38a96
3
  size 101752088
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:374bbc3237cc7e7bd30285a95b75e5e70f7faf581aba5922c65f250ce697560d
3
  size 52046596
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2a6dead6d577cc2fdab009dff4a17fbd4eeb91af9b65d3440cf7f72177a3b37
3
  size 52046596
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1546821c54d8d437f890166fec8f3e617edf24a1bd79d91cb3ed48225d7bfa93
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b98ac7b1e9243bd4e083ddd1c46b98c631cbc03a3912122b4b6336d976fc04e1
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b8eccac390378b22aeb148c260b00bec01d948946d8363d5282899af673e0e86
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bde2757391e94ff5103cea79868bee6f1ccc90f20c64e82cf9933fa7b5accd0c
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.006293662282081943,
5
  "eval_steps": 25,
6
- "global_step": 50,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -381,6 +381,76 @@
381
  "eval_samples_per_second": 0.951,
382
  "eval_steps_per_second": 0.475,
383
  "step": 50
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
384
  }
385
  ],
386
  "logging_steps": 1,
@@ -400,7 +470,7 @@
400
  "attributes": {}
401
  }
402
  },
403
- "total_flos": 3.2926920081408e+16,
404
  "train_batch_size": 2,
405
  "trial_name": null,
406
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.007552394738498332,
5
  "eval_steps": 25,
6
+ "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
381
  "eval_samples_per_second": 0.951,
382
  "eval_steps_per_second": 0.475,
383
  "step": 50
384
+ },
385
+ {
386
+ "epoch": 0.006419535527723582,
387
+ "grad_norm": 0.006420983001589775,
388
+ "learning_rate": 0.00011391731009600654,
389
+ "loss": 0.0011,
390
+ "step": 51
391
+ },
392
+ {
393
+ "epoch": 0.006545408773365222,
394
+ "grad_norm": 0.5650655627250671,
395
+ "learning_rate": 0.00011045284632676536,
396
+ "loss": 0.0442,
397
+ "step": 52
398
+ },
399
+ {
400
+ "epoch": 0.00667128201900686,
401
+ "grad_norm": 0.09890392422676086,
402
+ "learning_rate": 0.00010697564737441252,
403
+ "loss": 0.0048,
404
+ "step": 53
405
+ },
406
+ {
407
+ "epoch": 0.006797155264648499,
408
+ "grad_norm": 0.05633799359202385,
409
+ "learning_rate": 0.00010348994967025012,
410
+ "loss": 0.002,
411
+ "step": 54
412
+ },
413
+ {
414
+ "epoch": 0.006923028510290138,
415
+ "grad_norm": 3.1540989875793457,
416
+ "learning_rate": 0.0001,
417
+ "loss": 1.6562,
418
+ "step": 55
419
+ },
420
+ {
421
+ "epoch": 0.007048901755931777,
422
+ "grad_norm": 5.062186241149902,
423
+ "learning_rate": 9.651005032974994e-05,
424
+ "loss": 0.2802,
425
+ "step": 56
426
+ },
427
+ {
428
+ "epoch": 0.007174775001573415,
429
+ "grad_norm": 1.7618149518966675,
430
+ "learning_rate": 9.302435262558747e-05,
431
+ "loss": 0.1865,
432
+ "step": 57
433
+ },
434
+ {
435
+ "epoch": 0.007300648247215055,
436
+ "grad_norm": 0.37688034772872925,
437
+ "learning_rate": 8.954715367323468e-05,
438
+ "loss": 0.0144,
439
+ "step": 58
440
+ },
441
+ {
442
+ "epoch": 0.007426521492856693,
443
+ "grad_norm": 0.11028943210840225,
444
+ "learning_rate": 8.608268990399349e-05,
445
+ "loss": 0.0063,
446
+ "step": 59
447
+ },
448
+ {
449
+ "epoch": 0.007552394738498332,
450
+ "grad_norm": 0.5467414855957031,
451
+ "learning_rate": 8.263518223330697e-05,
452
+ "loss": 0.0118,
453
+ "step": 60
454
  }
455
  ],
456
  "logging_steps": 1,
 
470
  "attributes": {}
471
  }
472
  },
473
+ "total_flos": 3.95123040976896e+16,
474
  "train_batch_size": 2,
475
  "trial_name": null,
476
  "trial_params": null