diagonalge commited on
Commit
bae8cc1
·
verified ·
1 Parent(s): 573c00c

Training in progress, step 70, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:58b2166da01d55a046bf02b2b43ba0d22031515bba03e5d013b617cd11c38a96
3
  size 101752088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85905a4efa045b0030db51c0398a6f359f039aba18cec9509006d5d5b8af8d05
3
  size 101752088
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2a6dead6d577cc2fdab009dff4a17fbd4eeb91af9b65d3440cf7f72177a3b37
3
  size 52046596
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1991c9eb89d48a8ffc4f37213c19848b47dcef3c2b8314121e579e8434fb0c91
3
  size 52046596
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b98ac7b1e9243bd4e083ddd1c46b98c631cbc03a3912122b4b6336d976fc04e1
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:65f78a9649ee8cc6d0276caf10c7c8bafae430ddf310a187a7b4c38627fd2b56
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bde2757391e94ff5103cea79868bee6f1ccc90f20c64e82cf9933fa7b5accd0c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f98a8feef34550913a6c17e6d111551876ee5198dbf8b76141d29b9c822b726
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.007552394738498332,
5
  "eval_steps": 25,
6
- "global_step": 60,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -451,6 +451,76 @@
451
  "learning_rate": 8.263518223330697e-05,
452
  "loss": 0.0118,
453
  "step": 60
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
454
  }
455
  ],
456
  "logging_steps": 1,
@@ -470,7 +540,7 @@
470
  "attributes": {}
471
  }
472
  },
473
- "total_flos": 3.95123040976896e+16,
474
  "train_batch_size": 2,
475
  "trial_name": null,
476
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.008811127194914722,
5
  "eval_steps": 25,
6
+ "global_step": 70,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
451
  "learning_rate": 8.263518223330697e-05,
452
  "loss": 0.0118,
453
  "step": 60
454
+ },
455
+ {
456
+ "epoch": 0.007678267984139971,
457
+ "grad_norm": 0.029303928837180138,
458
+ "learning_rate": 7.920883091822408e-05,
459
+ "loss": 0.0006,
460
+ "step": 61
461
+ },
462
+ {
463
+ "epoch": 0.00780414122978161,
464
+ "grad_norm": 0.16770148277282715,
465
+ "learning_rate": 7.580781044003324e-05,
466
+ "loss": 0.0068,
467
+ "step": 62
468
+ },
469
+ {
470
+ "epoch": 0.007930014475423248,
471
+ "grad_norm": 0.1437792032957077,
472
+ "learning_rate": 7.243626441830009e-05,
473
+ "loss": 0.0024,
474
+ "step": 63
475
+ },
476
+ {
477
+ "epoch": 0.008055887721064888,
478
+ "grad_norm": 0.11593683063983917,
479
+ "learning_rate": 6.909830056250527e-05,
480
+ "loss": 0.0146,
481
+ "step": 64
482
+ },
483
+ {
484
+ "epoch": 0.008181760966706527,
485
+ "grad_norm": 1.9799082279205322,
486
+ "learning_rate": 6.579798566743314e-05,
487
+ "loss": 0.0358,
488
+ "step": 65
489
+ },
490
+ {
491
+ "epoch": 0.008307634212348165,
492
+ "grad_norm": 0.15605556964874268,
493
+ "learning_rate": 6.25393406584088e-05,
494
+ "loss": 0.0031,
495
+ "step": 66
496
+ },
497
+ {
498
+ "epoch": 0.008433507457989804,
499
+ "grad_norm": 0.03782504051923752,
500
+ "learning_rate": 5.9326335692419995e-05,
501
+ "loss": 0.0008,
502
+ "step": 67
503
+ },
504
+ {
505
+ "epoch": 0.008559380703631443,
506
+ "grad_norm": 0.013293488882482052,
507
+ "learning_rate": 5.616288532109225e-05,
508
+ "loss": 0.0006,
509
+ "step": 68
510
+ },
511
+ {
512
+ "epoch": 0.008685253949273082,
513
+ "grad_norm": 2.1244702339172363,
514
+ "learning_rate": 5.305284372141095e-05,
515
+ "loss": 0.3516,
516
+ "step": 69
517
+ },
518
+ {
519
+ "epoch": 0.008811127194914722,
520
+ "grad_norm": 0.1244107261300087,
521
+ "learning_rate": 5.000000000000002e-05,
522
+ "loss": 0.0042,
523
+ "step": 70
524
  }
525
  ],
526
  "logging_steps": 1,
 
540
  "attributes": {}
541
  }
542
  },
543
+ "total_flos": 4.60976881139712e+16,
544
  "train_batch_size": 2,
545
  "trial_name": null,
546
  "trial_params": null