leixa commited on
Commit
3955db9
·
verified ·
1 Parent(s): f03e540

Training in progress, step 252, checkpoint

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d48f02537ae2e81d3c8124f997d691ee108d3dc0b12e1ad53b612c4e57ddd73e
3
  size 341314196
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b5a7ed7529369a516560ebe1c26cfdba51e8269d23e3b7413f58c09ab2069175
3
  size 341314196
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:492da289a127a48721163632337bf0651c18833a78d45796b5cf96803839e7ea
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:31fef74ecc274c1e583f98d996eb610b459d5a80f6a2f818f0df2c8347af678c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bd95a1db0e917ddf11b12a343f06e907fcec4b81104002e2471b4778587b465
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee19ddad9c4c375a1de2d74fb4c1cf5e15d36c1ed47a2cb80f7cb0fbacb3b29e
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.011161159166101966,
5
  "eval_steps": 42,
6
- "global_step": 210,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -545,6 +545,112 @@
545
  "eval_samples_per_second": 13.376,
546
  "eval_steps_per_second": 1.672,
547
  "step": 210
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
548
  }
549
  ],
550
  "logging_steps": 3,
@@ -564,7 +670,7 @@
564
  "attributes": {}
565
  }
566
  },
567
- "total_flos": 3.198048786941215e+17,
568
  "train_batch_size": 8,
569
  "trial_name": null,
570
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.013393390999322359,
5
  "eval_steps": 42,
6
+ "global_step": 252,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
545
  "eval_samples_per_second": 13.376,
546
  "eval_steps_per_second": 1.672,
547
  "step": 210
548
+ },
549
+ {
550
+ "epoch": 0.01132060429704628,
551
+ "grad_norm": NaN,
552
+ "learning_rate": 6.330184227833376e-05,
553
+ "loss": 0.0,
554
+ "step": 213
555
+ },
556
+ {
557
+ "epoch": 0.011480049427990593,
558
+ "grad_norm": NaN,
559
+ "learning_rate": 6.237238428024572e-05,
560
+ "loss": 0.0,
561
+ "step": 216
562
+ },
563
+ {
564
+ "epoch": 0.011639494558934907,
565
+ "grad_norm": NaN,
566
+ "learning_rate": 6.143834918526527e-05,
567
+ "loss": 0.0,
568
+ "step": 219
569
+ },
570
+ {
571
+ "epoch": 0.011798939689879221,
572
+ "grad_norm": NaN,
573
+ "learning_rate": 6.0500082534642464e-05,
574
+ "loss": 0.0,
575
+ "step": 222
576
+ },
577
+ {
578
+ "epoch": 0.011958384820823535,
579
+ "grad_norm": NaN,
580
+ "learning_rate": 5.955793143506863e-05,
581
+ "loss": 0.0,
582
+ "step": 225
583
+ },
584
+ {
585
+ "epoch": 0.012117829951767849,
586
+ "grad_norm": NaN,
587
+ "learning_rate": 5.861224443026595e-05,
588
+ "loss": 0.0,
589
+ "step": 228
590
+ },
591
+ {
592
+ "epoch": 0.012277275082712162,
593
+ "grad_norm": NaN,
594
+ "learning_rate": 5.766337137204579e-05,
595
+ "loss": 0.0,
596
+ "step": 231
597
+ },
598
+ {
599
+ "epoch": 0.012436720213656476,
600
+ "grad_norm": NaN,
601
+ "learning_rate": 5.6711663290882776e-05,
602
+ "loss": 0.0,
603
+ "step": 234
604
+ },
605
+ {
606
+ "epoch": 0.01259616534460079,
607
+ "grad_norm": NaN,
608
+ "learning_rate": 5.575747226605298e-05,
609
+ "loss": 0.0,
610
+ "step": 237
611
+ },
612
+ {
613
+ "epoch": 0.012755610475545104,
614
+ "grad_norm": NaN,
615
+ "learning_rate": 5.480115129538409e-05,
616
+ "loss": 0.0,
617
+ "step": 240
618
+ },
619
+ {
620
+ "epoch": 0.012915055606489418,
621
+ "grad_norm": NaN,
622
+ "learning_rate": 5.384305416466584e-05,
623
+ "loss": 0.0,
624
+ "step": 243
625
+ },
626
+ {
627
+ "epoch": 0.013074500737433731,
628
+ "grad_norm": NaN,
629
+ "learning_rate": 5.288353531676873e-05,
630
+ "loss": 0.0,
631
+ "step": 246
632
+ },
633
+ {
634
+ "epoch": 0.013233945868378045,
635
+ "grad_norm": NaN,
636
+ "learning_rate": 5.192294972051992e-05,
637
+ "loss": 0.0,
638
+ "step": 249
639
+ },
640
+ {
641
+ "epoch": 0.013393390999322359,
642
+ "grad_norm": NaN,
643
+ "learning_rate": 5.0961652739384356e-05,
644
+ "loss": 0.0,
645
+ "step": 252
646
+ },
647
+ {
648
+ "epoch": 0.013393390999322359,
649
+ "eval_loss": NaN,
650
+ "eval_runtime": 2368.9779,
651
+ "eval_samples_per_second": 13.377,
652
+ "eval_steps_per_second": 1.672,
653
+ "step": 252
654
  }
655
  ],
656
  "logging_steps": 3,
 
670
  "attributes": {}
671
  }
672
  },
673
+ "total_flos": 3.8391670579082035e+17,
674
  "train_batch_size": 8,
675
  "trial_name": null,
676
  "trial_params": null