neuralwonderland commited on
Commit
4e8af12
·
verified ·
1 Parent(s): 1e4e0a1

Training in progress, step 3600, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b46847f44cde9f0095d19d105743b30fa16d5f27f3d169265933b2aca2d4d57d
3
  size 524363632
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f3c7ad048cfa8ba5bf2471e0c14390ef3c6e859f069f0faecb350f7b03e980f2
3
  size 524363632
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:028359e70e0c07427cba35e7ef72cf3c879bc0eaef6a0dfeb243e8bbb57fe8f8
3
- size 1049049442
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd7b57d28e1737dc995ec2a7a21f59f475558663b14e80aa35265b1856e9d9f4
3
+ size 1049049378
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:93e0c46fd64c96abdb0a034ee95bbd01584ee3cecfb18f900334d0172146c3c9
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02c6443db0efbefa727caf39e31aabbc8087eb7593e3bd6994e53b2e2b4ef874
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f8ced7fba3c19ab3520f96c3ed94338928b487f6b9e1c6274b7c5d145c844e9
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7aeddfe29841cd7856aa7b799d981a5a586a2282d9b446cc454326fca64812c1
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 1.1979233026504517,
3
- "best_model_checkpoint": "./output/checkpoint-3300",
4
- "epoch": 0.15453527435610304,
5
  "eval_steps": 150,
6
- "global_step": 3450,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2606,6 +2606,119 @@
2606
  "eval_samples_per_second": 9.676,
2607
  "eval_steps_per_second": 9.676,
2608
  "step": 3450
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2609
  }
2610
  ],
2611
  "logging_steps": 10,
@@ -2625,7 +2738,7 @@
2625
  "attributes": {}
2626
  }
2627
  },
2628
- "total_flos": 4.442699374516224e+17,
2629
  "train_batch_size": 4,
2630
  "trial_name": null,
2631
  "trial_params": null
 
1
  {
2
+ "best_metric": 1.1965739727020264,
3
+ "best_model_checkpoint": "./output/checkpoint-3600",
4
+ "epoch": 0.1612541993281075,
5
  "eval_steps": 150,
6
+ "global_step": 3600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2606
  "eval_samples_per_second": 9.676,
2607
  "eval_steps_per_second": 9.676,
2608
  "step": 3450
2609
+ },
2610
+ {
2611
+ "epoch": 0.15498320268757,
2612
+ "grad_norm": 5.320653438568115,
2613
+ "learning_rate": 1.6841363195546162e-06,
2614
+ "loss": 0.996,
2615
+ "step": 3460
2616
+ },
2617
+ {
2618
+ "epoch": 0.15543113101903697,
2619
+ "grad_norm": 4.333999156951904,
2620
+ "learning_rate": 1.6641134382132576e-06,
2621
+ "loss": 1.2536,
2622
+ "step": 3470
2623
+ },
2624
+ {
2625
+ "epoch": 0.15587905935050392,
2626
+ "grad_norm": 6.867399215698242,
2627
+ "learning_rate": 1.6441762994985947e-06,
2628
+ "loss": 1.1461,
2629
+ "step": 3480
2630
+ },
2631
+ {
2632
+ "epoch": 0.1563269876819709,
2633
+ "grad_norm": 3.2110917568206787,
2634
+ "learning_rate": 1.6243257229483141e-06,
2635
+ "loss": 1.1086,
2636
+ "step": 3490
2637
+ },
2638
+ {
2639
+ "epoch": 0.15677491601343785,
2640
+ "grad_norm": 3.345970630645752,
2641
+ "learning_rate": 1.6045625245418648e-06,
2642
+ "loss": 0.9485,
2643
+ "step": 3500
2644
+ },
2645
+ {
2646
+ "epoch": 0.15722284434490483,
2647
+ "grad_norm": 4.890392780303955,
2648
+ "learning_rate": 1.584887516666928e-06,
2649
+ "loss": 1.0968,
2650
+ "step": 3510
2651
+ },
2652
+ {
2653
+ "epoch": 0.15767077267637178,
2654
+ "grad_norm": 5.448171615600586,
2655
+ "learning_rate": 1.565301508086015e-06,
2656
+ "loss": 1.1305,
2657
+ "step": 3520
2658
+ },
2659
+ {
2660
+ "epoch": 0.15811870100783876,
2661
+ "grad_norm": 7.16267728805542,
2662
+ "learning_rate": 1.5458053039032263e-06,
2663
+ "loss": 1.2279,
2664
+ "step": 3530
2665
+ },
2666
+ {
2667
+ "epoch": 0.1585666293393057,
2668
+ "grad_norm": 5.2700018882751465,
2669
+ "learning_rate": 1.5263997055311536e-06,
2670
+ "loss": 1.0474,
2671
+ "step": 3540
2672
+ },
2673
+ {
2674
+ "epoch": 0.1590145576707727,
2675
+ "grad_norm": 5.955024719238281,
2676
+ "learning_rate": 1.5070855106579404e-06,
2677
+ "loss": 1.1283,
2678
+ "step": 3550
2679
+ },
2680
+ {
2681
+ "epoch": 0.15946248600223964,
2682
+ "grad_norm": 2.882784366607666,
2683
+ "learning_rate": 1.4878635132144885e-06,
2684
+ "loss": 0.9112,
2685
+ "step": 3560
2686
+ },
2687
+ {
2688
+ "epoch": 0.15991041433370662,
2689
+ "grad_norm": 4.2263875007629395,
2690
+ "learning_rate": 1.4687345033418258e-06,
2691
+ "loss": 1.1554,
2692
+ "step": 3570
2693
+ },
2694
+ {
2695
+ "epoch": 0.16035834266517357,
2696
+ "grad_norm": 4.622799396514893,
2697
+ "learning_rate": 1.4496992673586262e-06,
2698
+ "loss": 1.3423,
2699
+ "step": 3580
2700
+ },
2701
+ {
2702
+ "epoch": 0.16080627099664055,
2703
+ "grad_norm": 5.2950897216796875,
2704
+ "learning_rate": 1.4307585877288822e-06,
2705
+ "loss": 1.0494,
2706
+ "step": 3590
2707
+ },
2708
+ {
2709
+ "epoch": 0.1612541993281075,
2710
+ "grad_norm": 5.289889335632324,
2711
+ "learning_rate": 1.4119132430297496e-06,
2712
+ "loss": 1.1448,
2713
+ "step": 3600
2714
+ },
2715
+ {
2716
+ "epoch": 0.1612541993281075,
2717
+ "eval_loss": 1.1965739727020264,
2718
+ "eval_runtime": 51.7182,
2719
+ "eval_samples_per_second": 9.668,
2720
+ "eval_steps_per_second": 9.668,
2721
+ "step": 3600
2722
  }
2723
  ],
2724
  "logging_steps": 10,
 
2738
  "attributes": {}
2739
  }
2740
  },
2741
+ "total_flos": 4.637531183321088e+17,
2742
  "train_batch_size": 4,
2743
  "trial_name": null,
2744
  "trial_params": null