diagonalge commited on
Commit
996a276
·
verified ·
1 Parent(s): 4859444

Training in progress, step 80, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:85905a4efa045b0030db51c0398a6f359f039aba18cec9509006d5d5b8af8d05
3
  size 101752088
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b120c235acc603d7e0a88df6eb116c05a31d43ac85e97d14eca60636aa4e9e1
3
  size 101752088
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1991c9eb89d48a8ffc4f37213c19848b47dcef3c2b8314121e579e8434fb0c91
3
  size 52046596
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c28a74ee961f8b24fb2735619f4f437d34485673c89b57deafa93df5086d70b
3
  size 52046596
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65f78a9649ee8cc6d0276caf10c7c8bafae430ddf310a187a7b4c38627fd2b56
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2cd11dbeb5727fc345d367ecbecf218eaa06a9e3b7752506d05b1cc38c858f04
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f98a8feef34550913a6c17e6d111551876ee5198dbf8b76141d29b9c822b726
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49b8a1dbbf2c2a0b7fde326d57c34bd6c5e5d17e0aaf8b19016c1f721c049db1
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.008811127194914722,
5
  "eval_steps": 25,
6
- "global_step": 70,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -521,6 +521,84 @@
521
  "learning_rate": 5.000000000000002e-05,
522
  "loss": 0.0042,
523
  "step": 70
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
524
  }
525
  ],
526
  "logging_steps": 1,
@@ -540,7 +618,7 @@
540
  "attributes": {}
541
  }
542
  },
543
- "total_flos": 4.60976881139712e+16,
544
  "train_batch_size": 2,
545
  "trial_name": null,
546
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.01006985965133111,
5
  "eval_steps": 25,
6
+ "global_step": 80,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
521
  "learning_rate": 5.000000000000002e-05,
522
  "loss": 0.0042,
523
  "step": 70
524
+ },
525
+ {
526
+ "epoch": 0.00893700044055636,
527
+ "grad_norm": 0.05394396558403969,
528
+ "learning_rate": 4.700807357667952e-05,
529
+ "loss": 0.0018,
530
+ "step": 71
531
+ },
532
+ {
533
+ "epoch": 0.009062873686197999,
534
+ "grad_norm": 2.811530113220215,
535
+ "learning_rate": 4.4080709652925336e-05,
536
+ "loss": 0.2694,
537
+ "step": 72
538
+ },
539
+ {
540
+ "epoch": 0.009188746931839638,
541
+ "grad_norm": 0.06051735207438469,
542
+ "learning_rate": 4.12214747707527e-05,
543
+ "loss": 0.0016,
544
+ "step": 73
545
+ },
546
+ {
547
+ "epoch": 0.009314620177481277,
548
+ "grad_norm": 0.596136748790741,
549
+ "learning_rate": 3.843385246743417e-05,
550
+ "loss": 0.0087,
551
+ "step": 74
552
+ },
553
+ {
554
+ "epoch": 0.009440493423122915,
555
+ "grad_norm": 0.11233700811862946,
556
+ "learning_rate": 3.5721239031346066e-05,
557
+ "loss": 0.0045,
558
+ "step": 75
559
+ },
560
+ {
561
+ "epoch": 0.009440493423122915,
562
+ "eval_loss": NaN,
563
+ "eval_runtime": 3551.4208,
564
+ "eval_samples_per_second": 0.942,
565
+ "eval_steps_per_second": 0.471,
566
+ "step": 75
567
+ },
568
+ {
569
+ "epoch": 0.009566366668764554,
570
+ "grad_norm": 0.09344177693128586,
571
+ "learning_rate": 3.308693936411421e-05,
572
+ "loss": 0.0053,
573
+ "step": 76
574
+ },
575
+ {
576
+ "epoch": 0.009692239914406193,
577
+ "grad_norm": 0.017914390191435814,
578
+ "learning_rate": 3.053416295410026e-05,
579
+ "loss": 0.0009,
580
+ "step": 77
581
+ },
582
+ {
583
+ "epoch": 0.009818113160047832,
584
+ "grad_norm": 0.14340530335903168,
585
+ "learning_rate": 2.8066019966134904e-05,
586
+ "loss": 0.007,
587
+ "step": 78
588
+ },
589
+ {
590
+ "epoch": 0.00994398640568947,
591
+ "grad_norm": 2.4637436866760254,
592
+ "learning_rate": 2.5685517452260567e-05,
593
+ "loss": 0.2807,
594
+ "step": 79
595
+ },
596
+ {
597
+ "epoch": 0.01006985965133111,
598
+ "grad_norm": 0.016049662604928017,
599
+ "learning_rate": 2.339555568810221e-05,
600
+ "loss": 0.0003,
601
+ "step": 80
602
  }
603
  ],
604
  "logging_steps": 1,
 
618
  "attributes": {}
619
  }
620
  },
621
+ "total_flos": 5.26830721302528e+16,
622
  "train_batch_size": 2,
623
  "trial_name": null,
624
  "trial_params": null