neuralwonderland commited on
Commit
9f8d618
·
verified ·
1 Parent(s): ed3dc88

Training in progress, step 3750, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:81a67abcc251ee8774cf1f939d25bdb948c3090e27c09b7dd067fcc374015634
3
  size 69527352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3974b1bb4f346d15b63d67a72929b7007ca3f86b53199a1fd7e5e099e9c14afa
3
  size 69527352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd4d64756ff00ff120e2d59178ccf226833b52a51e47002c1ca458e16a29bf2b
3
  size 139313554
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:474b61e6b7c602c238188085fc11f74c10ee7fc4e1df53ec4af036170a9079c5
3
  size 139313554
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:100af15346f38893ee7a78a6ec3be82fa1fb054726f2de1c68aa0d2ed7d8c61c
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba2a26069fc52ae057d8721b042adddf9f086526ef980fe3ff35c4c5a9d329e8
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a1545736a5eb621f68b649a1ca6120c7dd4b230823d94d8efa1cacfea47cc556
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:607202c5623a0d7c32f1c23cdb843f56835e006025f9cf66875098e00a1be2b7
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.7831114530563354,
3
- "best_model_checkpoint": "./output/checkpoint-3600",
4
- "epoch": 0.44753853804077576,
5
  "eval_steps": 150,
6
- "global_step": 3600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2719,6 +2719,119 @@
2719
  "eval_samples_per_second": 8.968,
2720
  "eval_steps_per_second": 8.968,
2721
  "step": 3600
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2722
  }
2723
  ],
2724
  "logging_steps": 10,
@@ -2738,7 +2851,7 @@
2738
  "attributes": {}
2739
  }
2740
  },
2741
- "total_flos": 1.25205215952384e+17,
2742
  "train_batch_size": 16,
2743
  "trial_name": null,
2744
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.7807769775390625,
3
+ "best_model_checkpoint": "./output/checkpoint-3750",
4
+ "epoch": 0.46618597712580806,
5
  "eval_steps": 150,
6
+ "global_step": 3750,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2719
  "eval_samples_per_second": 8.968,
2720
  "eval_steps_per_second": 8.968,
2721
  "step": 3600
2722
+ },
2723
+ {
2724
+ "epoch": 0.44878170064644457,
2725
+ "grad_norm": 1.402942419052124,
2726
+ "learning_rate": 2.3219400131992273e-05,
2727
+ "loss": 0.6078,
2728
+ "step": 3610
2729
+ },
2730
+ {
2731
+ "epoch": 0.4500248632521134,
2732
+ "grad_norm": 0.9987258315086365,
2733
+ "learning_rate": 2.2908527551764404e-05,
2734
+ "loss": 0.5269,
2735
+ "step": 3620
2736
+ },
2737
+ {
2738
+ "epoch": 0.4512680258577822,
2739
+ "grad_norm": 1.655179500579834,
2740
+ "learning_rate": 2.259928242189966e-05,
2741
+ "loss": 0.5955,
2742
+ "step": 3630
2743
+ },
2744
+ {
2745
+ "epoch": 0.452511188463451,
2746
+ "grad_norm": 1.3401436805725098,
2747
+ "learning_rate": 2.2291677454254136e-05,
2748
+ "loss": 0.6061,
2749
+ "step": 3640
2750
+ },
2751
+ {
2752
+ "epoch": 0.4537543510691198,
2753
+ "grad_norm": 1.766615629196167,
2754
+ "learning_rate": 2.1985725293263237e-05,
2755
+ "loss": 0.6185,
2756
+ "step": 3650
2757
+ },
2758
+ {
2759
+ "epoch": 0.4549975136747887,
2760
+ "grad_norm": 1.7541744709014893,
2761
+ "learning_rate": 2.1681438515421953e-05,
2762
+ "loss": 0.5724,
2763
+ "step": 3660
2764
+ },
2765
+ {
2766
+ "epoch": 0.4562406762804575,
2767
+ "grad_norm": 1.9028109312057495,
2768
+ "learning_rate": 2.1378829628767965e-05,
2769
+ "loss": 0.5688,
2770
+ "step": 3670
2771
+ },
2772
+ {
2773
+ "epoch": 0.4574838388861263,
2774
+ "grad_norm": 1.54623281955719,
2775
+ "learning_rate": 2.1077911072367317e-05,
2776
+ "loss": 0.6044,
2777
+ "step": 3680
2778
+ },
2779
+ {
2780
+ "epoch": 0.4587270014917951,
2781
+ "grad_norm": 1.4844456911087036,
2782
+ "learning_rate": 2.077869521580325e-05,
2783
+ "loss": 0.5635,
2784
+ "step": 3690
2785
+ },
2786
+ {
2787
+ "epoch": 0.45997016409746394,
2788
+ "grad_norm": 1.464686632156372,
2789
+ "learning_rate": 2.0481194358667695e-05,
2790
+ "loss": 0.5237,
2791
+ "step": 3700
2792
+ },
2793
+ {
2794
+ "epoch": 0.46121332670313275,
2795
+ "grad_norm": 1.3379572629928589,
2796
+ "learning_rate": 2.018542073005567e-05,
2797
+ "loss": 0.5913,
2798
+ "step": 3710
2799
+ },
2800
+ {
2801
+ "epoch": 0.46245648930880157,
2802
+ "grad_norm": 1.292743444442749,
2803
+ "learning_rate": 1.9891386488062538e-05,
2804
+ "loss": 0.5878,
2805
+ "step": 3720
2806
+ },
2807
+ {
2808
+ "epoch": 0.46369965191447043,
2809
+ "grad_norm": 1.7692592144012451,
2810
+ "learning_rate": 1.959910371928436e-05,
2811
+ "loss": 0.5772,
2812
+ "step": 3730
2813
+ },
2814
+ {
2815
+ "epoch": 0.46494281452013925,
2816
+ "grad_norm": 1.5741891860961914,
2817
+ "learning_rate": 1.930858443832096e-05,
2818
+ "loss": 0.5899,
2819
+ "step": 3740
2820
+ },
2821
+ {
2822
+ "epoch": 0.46618597712580806,
2823
+ "grad_norm": 2.228027105331421,
2824
+ "learning_rate": 1.90198405872821e-05,
2825
+ "loss": 0.6182,
2826
+ "step": 3750
2827
+ },
2828
+ {
2829
+ "epoch": 0.46618597712580806,
2830
+ "eval_loss": 0.7807769775390625,
2831
+ "eval_runtime": 54.8116,
2832
+ "eval_samples_per_second": 9.122,
2833
+ "eval_steps_per_second": 9.122,
2834
+ "step": 3750
2835
  }
2836
  ],
2837
  "logging_steps": 10,
 
2851
  "attributes": {}
2852
  }
2853
  },
2854
+ "total_flos": 1.3044092846143488e+17,
2855
  "train_batch_size": 16,
2856
  "trial_name": null,
2857
  "trial_params": null