diaenra commited on
Commit
95aeca2
·
verified ·
1 Parent(s): a0cdc9f

Training in progress, step 1912, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:696b61b107c7830e174c74fc2c1e98a8ac2eb60432a2780ac20743074c60bfef
3
  size 2503003904
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2e893d538403d4ac222e2baaf746a33535ee8031c07cf1939cc3355ea15106a0
3
  size 2503003904
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:183ba56959c3f98243460a5cf43908fb35573ed3c49e7e641501af88f84532ad
3
  size 5006244836
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dbf245997dbc83cd89bcfb5067dfa742724b5f13ff1993cd0ad6d3d60a4c987a
3
  size 5006244836
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a9aa8a834991d1a099287d763aaf65662c5909034fe39dff582c0258f97c0051
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:12f09fa1a152c2febaa1b0be3c98d7abd70a22c5965d994af5b7173cc3e6ff7f
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e526b4ee743444ec6815869e1af216b1753a6adb990a1534692135db761d6817
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c767cedc54b733779ba8a20f635d848598fd89e5cfee0706f6c63df8c1e6b2d8
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8553169734151329,
5
  "eval_steps": 500,
6
- "global_step": 1673,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -11718,6 +11718,1679 @@
11718
  "learning_rate": 5.6277671951738716e-06,
11719
  "loss": 0.7867,
11720
  "step": 1673
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
11721
  }
11722
  ],
11723
  "logging_steps": 1,
@@ -11737,7 +13410,7 @@
11737
  "attributes": {}
11738
  }
11739
  },
11740
- "total_flos": 6.396906433916436e+17,
11741
  "train_batch_size": 8,
11742
  "trial_name": null,
11743
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9775051124744376,
5
  "eval_steps": 500,
6
+ "global_step": 1912,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
11718
  "learning_rate": 5.6277671951738716e-06,
11719
  "loss": 0.7867,
11720
  "step": 1673
11721
+ },
11722
+ {
11723
+ "epoch": 0.8558282208588958,
11724
+ "grad_norm": 3.041142225265503,
11725
+ "learning_rate": 5.588822025878476e-06,
11726
+ "loss": 0.7922,
11727
+ "step": 1674
11728
+ },
11729
+ {
11730
+ "epoch": 0.8563394683026585,
11731
+ "grad_norm": 3.526838779449463,
11732
+ "learning_rate": 5.550004100195639e-06,
11733
+ "loss": 0.9025,
11734
+ "step": 1675
11735
+ },
11736
+ {
11737
+ "epoch": 0.8568507157464212,
11738
+ "grad_norm": 3.6863765716552734,
11739
+ "learning_rate": 5.5113135293435815e-06,
11740
+ "loss": 0.9744,
11741
+ "step": 1676
11742
+ },
11743
+ {
11744
+ "epoch": 0.8573619631901841,
11745
+ "grad_norm": 3.2295031547546387,
11746
+ "learning_rate": 5.4727504241756874e-06,
11747
+ "loss": 0.8475,
11748
+ "step": 1677
11749
+ },
11750
+ {
11751
+ "epoch": 0.8578732106339468,
11752
+ "grad_norm": 3.341581344604492,
11753
+ "learning_rate": 5.434314895180082e-06,
11754
+ "loss": 0.8515,
11755
+ "step": 1678
11756
+ },
11757
+ {
11758
+ "epoch": 0.8583844580777096,
11759
+ "grad_norm": 3.328876495361328,
11760
+ "learning_rate": 5.396007052479407e-06,
11761
+ "loss": 0.8321,
11762
+ "step": 1679
11763
+ },
11764
+ {
11765
+ "epoch": 0.8588957055214724,
11766
+ "grad_norm": 3.497668743133545,
11767
+ "learning_rate": 5.357827005830435e-06,
11768
+ "loss": 0.8929,
11769
+ "step": 1680
11770
+ },
11771
+ {
11772
+ "epoch": 0.8594069529652352,
11773
+ "grad_norm": 3.690748691558838,
11774
+ "learning_rate": 5.319774864623834e-06,
11775
+ "loss": 0.8603,
11776
+ "step": 1681
11777
+ },
11778
+ {
11779
+ "epoch": 0.8599182004089979,
11780
+ "grad_norm": 3.709012508392334,
11781
+ "learning_rate": 5.281850737883731e-06,
11782
+ "loss": 0.9677,
11783
+ "step": 1682
11784
+ },
11785
+ {
11786
+ "epoch": 0.8604294478527608,
11787
+ "grad_norm": 3.8703484535217285,
11788
+ "learning_rate": 5.2440547342675614e-06,
11789
+ "loss": 0.8876,
11790
+ "step": 1683
11791
+ },
11792
+ {
11793
+ "epoch": 0.8609406952965235,
11794
+ "grad_norm": 3.9959239959716797,
11795
+ "learning_rate": 5.206386962065602e-06,
11796
+ "loss": 0.791,
11797
+ "step": 1684
11798
+ },
11799
+ {
11800
+ "epoch": 0.8614519427402862,
11801
+ "grad_norm": 3.8929603099823,
11802
+ "learning_rate": 5.168847529200782e-06,
11803
+ "loss": 0.8654,
11804
+ "step": 1685
11805
+ },
11806
+ {
11807
+ "epoch": 0.8619631901840491,
11808
+ "grad_norm": 4.002877235412598,
11809
+ "learning_rate": 5.1314365432282904e-06,
11810
+ "loss": 0.8284,
11811
+ "step": 1686
11812
+ },
11813
+ {
11814
+ "epoch": 0.8624744376278118,
11815
+ "grad_norm": 4.066626071929932,
11816
+ "learning_rate": 5.094154111335292e-06,
11817
+ "loss": 0.8603,
11818
+ "step": 1687
11819
+ },
11820
+ {
11821
+ "epoch": 0.8629856850715747,
11822
+ "grad_norm": 4.232320785522461,
11823
+ "learning_rate": 5.057000340340678e-06,
11824
+ "loss": 0.9426,
11825
+ "step": 1688
11826
+ },
11827
+ {
11828
+ "epoch": 0.8634969325153374,
11829
+ "grad_norm": 3.965895175933838,
11830
+ "learning_rate": 5.019975336694649e-06,
11831
+ "loss": 0.7798,
11832
+ "step": 1689
11833
+ },
11834
+ {
11835
+ "epoch": 0.8640081799591002,
11836
+ "grad_norm": 4.249181270599365,
11837
+ "learning_rate": 4.983079206478513e-06,
11838
+ "loss": 0.8211,
11839
+ "step": 1690
11840
+ },
11841
+ {
11842
+ "epoch": 0.864519427402863,
11843
+ "grad_norm": 4.230957508087158,
11844
+ "learning_rate": 4.946312055404328e-06,
11845
+ "loss": 0.9142,
11846
+ "step": 1691
11847
+ },
11848
+ {
11849
+ "epoch": 0.8650306748466258,
11850
+ "grad_norm": 4.479368209838867,
11851
+ "learning_rate": 4.909673988814601e-06,
11852
+ "loss": 0.9803,
11853
+ "step": 1692
11854
+ },
11855
+ {
11856
+ "epoch": 0.8655419222903885,
11857
+ "grad_norm": 4.021700859069824,
11858
+ "learning_rate": 4.873165111681993e-06,
11859
+ "loss": 0.7705,
11860
+ "step": 1693
11861
+ },
11862
+ {
11863
+ "epoch": 0.8660531697341514,
11864
+ "grad_norm": 4.131351470947266,
11865
+ "learning_rate": 4.836785528609051e-06,
11866
+ "loss": 0.7206,
11867
+ "step": 1694
11868
+ },
11869
+ {
11870
+ "epoch": 0.8665644171779141,
11871
+ "grad_norm": 4.708834171295166,
11872
+ "learning_rate": 4.800535343827833e-06,
11873
+ "loss": 0.8698,
11874
+ "step": 1695
11875
+ },
11876
+ {
11877
+ "epoch": 0.8670756646216768,
11878
+ "grad_norm": 4.927844047546387,
11879
+ "learning_rate": 4.764414661199707e-06,
11880
+ "loss": 0.7615,
11881
+ "step": 1696
11882
+ },
11883
+ {
11884
+ "epoch": 0.8675869120654397,
11885
+ "grad_norm": 5.510052680969238,
11886
+ "learning_rate": 4.728423584214947e-06,
11887
+ "loss": 0.9676,
11888
+ "step": 1697
11889
+ },
11890
+ {
11891
+ "epoch": 0.8680981595092024,
11892
+ "grad_norm": 5.557268142700195,
11893
+ "learning_rate": 4.692562215992541e-06,
11894
+ "loss": 0.6631,
11895
+ "step": 1698
11896
+ },
11897
+ {
11898
+ "epoch": 0.8686094069529653,
11899
+ "grad_norm": 6.219789028167725,
11900
+ "learning_rate": 4.656830659279804e-06,
11901
+ "loss": 0.8028,
11902
+ "step": 1699
11903
+ },
11904
+ {
11905
+ "epoch": 0.869120654396728,
11906
+ "grad_norm": 5.629024028778076,
11907
+ "learning_rate": 4.621229016452156e-06,
11908
+ "loss": 0.4033,
11909
+ "step": 1700
11910
+ },
11911
+ {
11912
+ "epoch": 0.8696319018404908,
11913
+ "grad_norm": 1.9563968181610107,
11914
+ "learning_rate": 4.585757389512768e-06,
11915
+ "loss": 0.8627,
11916
+ "step": 1701
11917
+ },
11918
+ {
11919
+ "epoch": 0.8701431492842536,
11920
+ "grad_norm": 2.113006591796875,
11921
+ "learning_rate": 4.550415880092313e-06,
11922
+ "loss": 0.8872,
11923
+ "step": 1702
11924
+ },
11925
+ {
11926
+ "epoch": 0.8706543967280164,
11927
+ "grad_norm": 2.489034652709961,
11928
+ "learning_rate": 4.515204589448674e-06,
11929
+ "loss": 0.9124,
11930
+ "step": 1703
11931
+ },
11932
+ {
11933
+ "epoch": 0.8711656441717791,
11934
+ "grad_norm": 2.6170196533203125,
11935
+ "learning_rate": 4.48012361846662e-06,
11936
+ "loss": 1.0461,
11937
+ "step": 1704
11938
+ },
11939
+ {
11940
+ "epoch": 0.871676891615542,
11941
+ "grad_norm": 2.774226188659668,
11942
+ "learning_rate": 4.445173067657554e-06,
11943
+ "loss": 1.0186,
11944
+ "step": 1705
11945
+ },
11946
+ {
11947
+ "epoch": 0.8721881390593047,
11948
+ "grad_norm": 2.7719779014587402,
11949
+ "learning_rate": 4.410353037159193e-06,
11950
+ "loss": 0.9851,
11951
+ "step": 1706
11952
+ },
11953
+ {
11954
+ "epoch": 0.8726993865030674,
11955
+ "grad_norm": 2.8371808528900146,
11956
+ "learning_rate": 4.3756636267353214e-06,
11957
+ "loss": 1.0079,
11958
+ "step": 1707
11959
+ },
11960
+ {
11961
+ "epoch": 0.8732106339468303,
11962
+ "grad_norm": 3.129444122314453,
11963
+ "learning_rate": 4.341104935775442e-06,
11964
+ "loss": 0.8973,
11965
+ "step": 1708
11966
+ },
11967
+ {
11968
+ "epoch": 0.873721881390593,
11969
+ "grad_norm": 2.850924491882324,
11970
+ "learning_rate": 4.306677063294573e-06,
11971
+ "loss": 0.918,
11972
+ "step": 1709
11973
+ },
11974
+ {
11975
+ "epoch": 0.8742331288343558,
11976
+ "grad_norm": 2.839162826538086,
11977
+ "learning_rate": 4.272380107932888e-06,
11978
+ "loss": 0.9134,
11979
+ "step": 1710
11980
+ },
11981
+ {
11982
+ "epoch": 0.8747443762781186,
11983
+ "grad_norm": 3.149712562561035,
11984
+ "learning_rate": 4.238214167955484e-06,
11985
+ "loss": 0.9679,
11986
+ "step": 1711
11987
+ },
11988
+ {
11989
+ "epoch": 0.8752556237218814,
11990
+ "grad_norm": 2.945416212081909,
11991
+ "learning_rate": 4.2041793412520734e-06,
11992
+ "loss": 0.8602,
11993
+ "step": 1712
11994
+ },
11995
+ {
11996
+ "epoch": 0.8757668711656442,
11997
+ "grad_norm": 3.0397250652313232,
11998
+ "learning_rate": 4.17027572533672e-06,
11999
+ "loss": 0.9156,
12000
+ "step": 1713
12001
+ },
12002
+ {
12003
+ "epoch": 0.876278118609407,
12004
+ "grad_norm": 3.256863832473755,
12005
+ "learning_rate": 4.136503417347554e-06,
12006
+ "loss": 0.7461,
12007
+ "step": 1714
12008
+ },
12009
+ {
12010
+ "epoch": 0.8767893660531697,
12011
+ "grad_norm": 3.1958820819854736,
12012
+ "learning_rate": 4.102862514046474e-06,
12013
+ "loss": 0.8761,
12014
+ "step": 1715
12015
+ },
12016
+ {
12017
+ "epoch": 0.8773006134969326,
12018
+ "grad_norm": 3.4146556854248047,
12019
+ "learning_rate": 4.069353111818913e-06,
12020
+ "loss": 0.9106,
12021
+ "step": 1716
12022
+ },
12023
+ {
12024
+ "epoch": 0.8778118609406953,
12025
+ "grad_norm": 3.057286024093628,
12026
+ "learning_rate": 4.035975306673517e-06,
12027
+ "loss": 0.8755,
12028
+ "step": 1717
12029
+ },
12030
+ {
12031
+ "epoch": 0.878323108384458,
12032
+ "grad_norm": 3.0085763931274414,
12033
+ "learning_rate": 4.0027291942419055e-06,
12034
+ "loss": 0.7805,
12035
+ "step": 1718
12036
+ },
12037
+ {
12038
+ "epoch": 0.8788343558282209,
12039
+ "grad_norm": 3.2898752689361572,
12040
+ "learning_rate": 3.969614869778354e-06,
12041
+ "loss": 0.8877,
12042
+ "step": 1719
12043
+ },
12044
+ {
12045
+ "epoch": 0.8793456032719836,
12046
+ "grad_norm": 3.1025335788726807,
12047
+ "learning_rate": 3.936632428159609e-06,
12048
+ "loss": 0.808,
12049
+ "step": 1720
12050
+ },
12051
+ {
12052
+ "epoch": 0.8798568507157464,
12053
+ "grad_norm": 3.2930991649627686,
12054
+ "learning_rate": 3.903781963884467e-06,
12055
+ "loss": 0.8736,
12056
+ "step": 1721
12057
+ },
12058
+ {
12059
+ "epoch": 0.8803680981595092,
12060
+ "grad_norm": 3.3883445262908936,
12061
+ "learning_rate": 3.871063571073668e-06,
12062
+ "loss": 0.9969,
12063
+ "step": 1722
12064
+ },
12065
+ {
12066
+ "epoch": 0.880879345603272,
12067
+ "grad_norm": 3.6220452785491943,
12068
+ "learning_rate": 3.838477343469516e-06,
12069
+ "loss": 0.88,
12070
+ "step": 1723
12071
+ },
12072
+ {
12073
+ "epoch": 0.8813905930470347,
12074
+ "grad_norm": 3.937267303466797,
12075
+ "learning_rate": 3.8060233744356633e-06,
12076
+ "loss": 0.9832,
12077
+ "step": 1724
12078
+ },
12079
+ {
12080
+ "epoch": 0.8819018404907976,
12081
+ "grad_norm": 3.5126397609710693,
12082
+ "learning_rate": 3.77370175695681e-06,
12083
+ "loss": 0.8513,
12084
+ "step": 1725
12085
+ },
12086
+ {
12087
+ "epoch": 0.8824130879345603,
12088
+ "grad_norm": 3.5317177772521973,
12089
+ "learning_rate": 3.74151258363844e-06,
12090
+ "loss": 0.8387,
12091
+ "step": 1726
12092
+ },
12093
+ {
12094
+ "epoch": 0.8829243353783232,
12095
+ "grad_norm": 3.232076406478882,
12096
+ "learning_rate": 3.7094559467066083e-06,
12097
+ "loss": 0.8725,
12098
+ "step": 1727
12099
+ },
12100
+ {
12101
+ "epoch": 0.8834355828220859,
12102
+ "grad_norm": 3.6095595359802246,
12103
+ "learning_rate": 3.6775319380076e-06,
12104
+ "loss": 0.9068,
12105
+ "step": 1728
12106
+ },
12107
+ {
12108
+ "epoch": 0.8839468302658486,
12109
+ "grad_norm": 3.2999720573425293,
12110
+ "learning_rate": 3.645740649007734e-06,
12111
+ "loss": 0.806,
12112
+ "step": 1729
12113
+ },
12114
+ {
12115
+ "epoch": 0.8844580777096115,
12116
+ "grad_norm": 3.733455181121826,
12117
+ "learning_rate": 3.614082170793021e-06,
12118
+ "loss": 0.8415,
12119
+ "step": 1730
12120
+ },
12121
+ {
12122
+ "epoch": 0.8849693251533742,
12123
+ "grad_norm": 3.5478620529174805,
12124
+ "learning_rate": 3.5825565940690087e-06,
12125
+ "loss": 0.8471,
12126
+ "step": 1731
12127
+ },
12128
+ {
12129
+ "epoch": 0.885480572597137,
12130
+ "grad_norm": 3.889519214630127,
12131
+ "learning_rate": 3.551164009160429e-06,
12132
+ "loss": 0.8128,
12133
+ "step": 1732
12134
+ },
12135
+ {
12136
+ "epoch": 0.8859918200408998,
12137
+ "grad_norm": 3.7842485904693604,
12138
+ "learning_rate": 3.5199045060110013e-06,
12139
+ "loss": 0.9556,
12140
+ "step": 1733
12141
+ },
12142
+ {
12143
+ "epoch": 0.8865030674846626,
12144
+ "grad_norm": 3.5816397666931152,
12145
+ "learning_rate": 3.488778174183116e-06,
12146
+ "loss": 0.8108,
12147
+ "step": 1734
12148
+ },
12149
+ {
12150
+ "epoch": 0.8870143149284253,
12151
+ "grad_norm": 3.812117338180542,
12152
+ "learning_rate": 3.4577851028576523e-06,
12153
+ "loss": 0.7997,
12154
+ "step": 1735
12155
+ },
12156
+ {
12157
+ "epoch": 0.8875255623721882,
12158
+ "grad_norm": 3.7888643741607666,
12159
+ "learning_rate": 3.4269253808336455e-06,
12160
+ "loss": 0.8451,
12161
+ "step": 1736
12162
+ },
12163
+ {
12164
+ "epoch": 0.8880368098159509,
12165
+ "grad_norm": 3.8936595916748047,
12166
+ "learning_rate": 3.3961990965280745e-06,
12167
+ "loss": 0.8154,
12168
+ "step": 1737
12169
+ },
12170
+ {
12171
+ "epoch": 0.8885480572597138,
12172
+ "grad_norm": 4.134406566619873,
12173
+ "learning_rate": 3.36560633797563e-06,
12174
+ "loss": 0.8866,
12175
+ "step": 1738
12176
+ },
12177
+ {
12178
+ "epoch": 0.8890593047034765,
12179
+ "grad_norm": 4.125121116638184,
12180
+ "learning_rate": 3.335147192828403e-06,
12181
+ "loss": 0.7727,
12182
+ "step": 1739
12183
+ },
12184
+ {
12185
+ "epoch": 0.8895705521472392,
12186
+ "grad_norm": 4.411681175231934,
12187
+ "learning_rate": 3.3048217483556744e-06,
12188
+ "loss": 0.8563,
12189
+ "step": 1740
12190
+ },
12191
+ {
12192
+ "epoch": 0.8900817995910021,
12193
+ "grad_norm": 3.7549989223480225,
12194
+ "learning_rate": 3.2746300914436534e-06,
12195
+ "loss": 0.7357,
12196
+ "step": 1741
12197
+ },
12198
+ {
12199
+ "epoch": 0.8905930470347648,
12200
+ "grad_norm": 4.538980960845947,
12201
+ "learning_rate": 3.2445723085952504e-06,
12202
+ "loss": 0.818,
12203
+ "step": 1742
12204
+ },
12205
+ {
12206
+ "epoch": 0.8911042944785276,
12207
+ "grad_norm": 4.389502048492432,
12208
+ "learning_rate": 3.214648485929783e-06,
12209
+ "loss": 0.8799,
12210
+ "step": 1743
12211
+ },
12212
+ {
12213
+ "epoch": 0.8916155419222904,
12214
+ "grad_norm": 4.548320293426514,
12215
+ "learning_rate": 3.184858709182775e-06,
12216
+ "loss": 0.6832,
12217
+ "step": 1744
12218
+ },
12219
+ {
12220
+ "epoch": 0.8921267893660532,
12221
+ "grad_norm": 4.614025115966797,
12222
+ "learning_rate": 3.1552030637056806e-06,
12223
+ "loss": 0.8305,
12224
+ "step": 1745
12225
+ },
12226
+ {
12227
+ "epoch": 0.8926380368098159,
12228
+ "grad_norm": 4.867095470428467,
12229
+ "learning_rate": 3.1256816344656602e-06,
12230
+ "loss": 0.8145,
12231
+ "step": 1746
12232
+ },
12233
+ {
12234
+ "epoch": 0.8931492842535788,
12235
+ "grad_norm": 4.788604259490967,
12236
+ "learning_rate": 3.096294506045311e-06,
12237
+ "loss": 0.8077,
12238
+ "step": 1747
12239
+ },
12240
+ {
12241
+ "epoch": 0.8936605316973415,
12242
+ "grad_norm": 5.478014945983887,
12243
+ "learning_rate": 3.067041762642475e-06,
12244
+ "loss": 0.771,
12245
+ "step": 1748
12246
+ },
12247
+ {
12248
+ "epoch": 0.8941717791411042,
12249
+ "grad_norm": 6.572366237640381,
12250
+ "learning_rate": 3.037923488069927e-06,
12251
+ "loss": 0.7604,
12252
+ "step": 1749
12253
+ },
12254
+ {
12255
+ "epoch": 0.8946830265848671,
12256
+ "grad_norm": 5.753463268280029,
12257
+ "learning_rate": 3.0089397657551865e-06,
12258
+ "loss": 0.4072,
12259
+ "step": 1750
12260
+ },
12261
+ {
12262
+ "epoch": 0.8951942740286298,
12263
+ "grad_norm": 1.9616332054138184,
12264
+ "learning_rate": 2.9800906787402716e-06,
12265
+ "loss": 0.9257,
12266
+ "step": 1751
12267
+ },
12268
+ {
12269
+ "epoch": 0.8957055214723927,
12270
+ "grad_norm": 2.392256259918213,
12271
+ "learning_rate": 2.9513763096814305e-06,
12272
+ "loss": 1.0013,
12273
+ "step": 1752
12274
+ },
12275
+ {
12276
+ "epoch": 0.8962167689161554,
12277
+ "grad_norm": 2.3039324283599854,
12278
+ "learning_rate": 2.9227967408489653e-06,
12279
+ "loss": 0.9961,
12280
+ "step": 1753
12281
+ },
12282
+ {
12283
+ "epoch": 0.8967280163599182,
12284
+ "grad_norm": 2.534642457962036,
12285
+ "learning_rate": 2.89435205412692e-06,
12286
+ "loss": 0.9289,
12287
+ "step": 1754
12288
+ },
12289
+ {
12290
+ "epoch": 0.897239263803681,
12291
+ "grad_norm": 2.941516160964966,
12292
+ "learning_rate": 2.8660423310129135e-06,
12293
+ "loss": 0.957,
12294
+ "step": 1755
12295
+ },
12296
+ {
12297
+ "epoch": 0.8977505112474438,
12298
+ "grad_norm": 2.806290626525879,
12299
+ "learning_rate": 2.8378676526178482e-06,
12300
+ "loss": 1.0622,
12301
+ "step": 1756
12302
+ },
12303
+ {
12304
+ "epoch": 0.8982617586912065,
12305
+ "grad_norm": 2.6641461849212646,
12306
+ "learning_rate": 2.8098280996657456e-06,
12307
+ "loss": 0.9309,
12308
+ "step": 1757
12309
+ },
12310
+ {
12311
+ "epoch": 0.8987730061349694,
12312
+ "grad_norm": 2.6802642345428467,
12313
+ "learning_rate": 2.781923752493437e-06,
12314
+ "loss": 0.8945,
12315
+ "step": 1758
12316
+ },
12317
+ {
12318
+ "epoch": 0.8992842535787321,
12319
+ "grad_norm": 2.851447820663452,
12320
+ "learning_rate": 2.754154691050387e-06,
12321
+ "loss": 0.8722,
12322
+ "step": 1759
12323
+ },
12324
+ {
12325
+ "epoch": 0.8997955010224948,
12326
+ "grad_norm": 2.958563804626465,
12327
+ "learning_rate": 2.7265209948984514e-06,
12328
+ "loss": 0.8529,
12329
+ "step": 1760
12330
+ },
12331
+ {
12332
+ "epoch": 0.9003067484662577,
12333
+ "grad_norm": 2.867089033126831,
12334
+ "learning_rate": 2.6990227432116544e-06,
12335
+ "loss": 0.8957,
12336
+ "step": 1761
12337
+ },
12338
+ {
12339
+ "epoch": 0.9008179959100204,
12340
+ "grad_norm": 3.3198704719543457,
12341
+ "learning_rate": 2.671660014775934e-06,
12342
+ "loss": 0.8905,
12343
+ "step": 1762
12344
+ },
12345
+ {
12346
+ "epoch": 0.9013292433537833,
12347
+ "grad_norm": 3.0108888149261475,
12348
+ "learning_rate": 2.6444328879889622e-06,
12349
+ "loss": 0.8434,
12350
+ "step": 1763
12351
+ },
12352
+ {
12353
+ "epoch": 0.901840490797546,
12354
+ "grad_norm": 2.9155540466308594,
12355
+ "learning_rate": 2.6173414408598827e-06,
12356
+ "loss": 0.8414,
12357
+ "step": 1764
12358
+ },
12359
+ {
12360
+ "epoch": 0.9023517382413088,
12361
+ "grad_norm": 3.178889036178589,
12362
+ "learning_rate": 2.5903857510090835e-06,
12363
+ "loss": 0.9461,
12364
+ "step": 1765
12365
+ },
12366
+ {
12367
+ "epoch": 0.9028629856850716,
12368
+ "grad_norm": 3.119640588760376,
12369
+ "learning_rate": 2.56356589566803e-06,
12370
+ "loss": 0.8996,
12371
+ "step": 1766
12372
+ },
12373
+ {
12374
+ "epoch": 0.9033742331288344,
12375
+ "grad_norm": 3.1831552982330322,
12376
+ "learning_rate": 2.53688195167896e-06,
12377
+ "loss": 0.9139,
12378
+ "step": 1767
12379
+ },
12380
+ {
12381
+ "epoch": 0.9038854805725971,
12382
+ "grad_norm": 3.3082258701324463,
12383
+ "learning_rate": 2.5103339954947626e-06,
12384
+ "loss": 0.8465,
12385
+ "step": 1768
12386
+ },
12387
+ {
12388
+ "epoch": 0.90439672801636,
12389
+ "grad_norm": 3.1402430534362793,
12390
+ "learning_rate": 2.483922103178632e-06,
12391
+ "loss": 0.9071,
12392
+ "step": 1769
12393
+ },
12394
+ {
12395
+ "epoch": 0.9049079754601227,
12396
+ "grad_norm": 3.3881165981292725,
12397
+ "learning_rate": 2.4576463504039913e-06,
12398
+ "loss": 0.9479,
12399
+ "step": 1770
12400
+ },
12401
+ {
12402
+ "epoch": 0.9054192229038854,
12403
+ "grad_norm": 3.3849008083343506,
12404
+ "learning_rate": 2.4315068124541597e-06,
12405
+ "loss": 0.8833,
12406
+ "step": 1771
12407
+ },
12408
+ {
12409
+ "epoch": 0.9059304703476483,
12410
+ "grad_norm": 3.725773334503174,
12411
+ "learning_rate": 2.4055035642222224e-06,
12412
+ "loss": 0.8946,
12413
+ "step": 1772
12414
+ },
12415
+ {
12416
+ "epoch": 0.906441717791411,
12417
+ "grad_norm": 4.276130676269531,
12418
+ "learning_rate": 2.3796366802107394e-06,
12419
+ "loss": 0.9649,
12420
+ "step": 1773
12421
+ },
12422
+ {
12423
+ "epoch": 0.9069529652351738,
12424
+ "grad_norm": 3.543367862701416,
12425
+ "learning_rate": 2.3539062345316e-06,
12426
+ "loss": 0.8461,
12427
+ "step": 1774
12428
+ },
12429
+ {
12430
+ "epoch": 0.9074642126789366,
12431
+ "grad_norm": 3.076871156692505,
12432
+ "learning_rate": 2.3283123009057607e-06,
12433
+ "loss": 0.8243,
12434
+ "step": 1775
12435
+ },
12436
+ {
12437
+ "epoch": 0.9079754601226994,
12438
+ "grad_norm": 3.6242263317108154,
12439
+ "learning_rate": 2.3028549526630583e-06,
12440
+ "loss": 0.8345,
12441
+ "step": 1776
12442
+ },
12443
+ {
12444
+ "epoch": 0.9084867075664622,
12445
+ "grad_norm": 3.5291264057159424,
12446
+ "learning_rate": 2.277534262742015e-06,
12447
+ "loss": 0.892,
12448
+ "step": 1777
12449
+ },
12450
+ {
12451
+ "epoch": 0.908997955010225,
12452
+ "grad_norm": 3.4097137451171875,
12453
+ "learning_rate": 2.2523503036895764e-06,
12454
+ "loss": 0.8539,
12455
+ "step": 1778
12456
+ },
12457
+ {
12458
+ "epoch": 0.9095092024539877,
12459
+ "grad_norm": 3.7175967693328857,
12460
+ "learning_rate": 2.227303147660964e-06,
12461
+ "loss": 0.9083,
12462
+ "step": 1779
12463
+ },
12464
+ {
12465
+ "epoch": 0.9100204498977505,
12466
+ "grad_norm": 3.9345943927764893,
12467
+ "learning_rate": 2.202392866419423e-06,
12468
+ "loss": 0.9569,
12469
+ "step": 1780
12470
+ },
12471
+ {
12472
+ "epoch": 0.9105316973415133,
12473
+ "grad_norm": 3.6897552013397217,
12474
+ "learning_rate": 2.1776195313360505e-06,
12475
+ "loss": 0.8444,
12476
+ "step": 1781
12477
+ },
12478
+ {
12479
+ "epoch": 0.911042944785276,
12480
+ "grad_norm": 3.9733729362487793,
12481
+ "learning_rate": 2.152983213389559e-06,
12482
+ "loss": 0.9426,
12483
+ "step": 1782
12484
+ },
12485
+ {
12486
+ "epoch": 0.9115541922290389,
12487
+ "grad_norm": 4.125808238983154,
12488
+ "learning_rate": 2.1284839831661075e-06,
12489
+ "loss": 0.8886,
12490
+ "step": 1783
12491
+ },
12492
+ {
12493
+ "epoch": 0.9120654396728016,
12494
+ "grad_norm": 3.4890663623809814,
12495
+ "learning_rate": 2.1041219108590692e-06,
12496
+ "loss": 0.8138,
12497
+ "step": 1784
12498
+ },
12499
+ {
12500
+ "epoch": 0.9125766871165644,
12501
+ "grad_norm": 3.9719114303588867,
12502
+ "learning_rate": 2.0798970662688545e-06,
12503
+ "loss": 0.8747,
12504
+ "step": 1785
12505
+ },
12506
+ {
12507
+ "epoch": 0.9130879345603272,
12508
+ "grad_norm": 4.040510654449463,
12509
+ "learning_rate": 2.055809518802676e-06,
12510
+ "loss": 1.0,
12511
+ "step": 1786
12512
+ },
12513
+ {
12514
+ "epoch": 0.91359918200409,
12515
+ "grad_norm": 4.081876754760742,
12516
+ "learning_rate": 2.031859337474407e-06,
12517
+ "loss": 0.7761,
12518
+ "step": 1787
12519
+ },
12520
+ {
12521
+ "epoch": 0.9141104294478528,
12522
+ "grad_norm": 4.227837562561035,
12523
+ "learning_rate": 2.0080465909043113e-06,
12524
+ "loss": 0.8716,
12525
+ "step": 1788
12526
+ },
12527
+ {
12528
+ "epoch": 0.9146216768916156,
12529
+ "grad_norm": 4.269420623779297,
12530
+ "learning_rate": 1.984371347318914e-06,
12531
+ "loss": 0.8066,
12532
+ "step": 1789
12533
+ },
12534
+ {
12535
+ "epoch": 0.9151329243353783,
12536
+ "grad_norm": 4.166466236114502,
12537
+ "learning_rate": 1.9608336745507716e-06,
12538
+ "loss": 0.802,
12539
+ "step": 1790
12540
+ },
12541
+ {
12542
+ "epoch": 0.9156441717791411,
12543
+ "grad_norm": 4.287985801696777,
12544
+ "learning_rate": 1.937433640038261e-06,
12545
+ "loss": 0.7958,
12546
+ "step": 1791
12547
+ },
12548
+ {
12549
+ "epoch": 0.9161554192229039,
12550
+ "grad_norm": 4.09846830368042,
12551
+ "learning_rate": 1.914171310825441e-06,
12552
+ "loss": 0.7164,
12553
+ "step": 1792
12554
+ },
12555
+ {
12556
+ "epoch": 0.9166666666666666,
12557
+ "grad_norm": 4.546944618225098,
12558
+ "learning_rate": 1.8910467535617983e-06,
12559
+ "loss": 0.7325,
12560
+ "step": 1793
12561
+ },
12562
+ {
12563
+ "epoch": 0.9171779141104295,
12564
+ "grad_norm": 4.4474897384643555,
12565
+ "learning_rate": 1.8680600345021171e-06,
12566
+ "loss": 0.7291,
12567
+ "step": 1794
12568
+ },
12569
+ {
12570
+ "epoch": 0.9176891615541922,
12571
+ "grad_norm": 4.722184658050537,
12572
+ "learning_rate": 1.845211219506221e-06,
12573
+ "loss": 0.6715,
12574
+ "step": 1795
12575
+ },
12576
+ {
12577
+ "epoch": 0.918200408997955,
12578
+ "grad_norm": 5.06058931350708,
12579
+ "learning_rate": 1.8225003740388547e-06,
12580
+ "loss": 0.9135,
12581
+ "step": 1796
12582
+ },
12583
+ {
12584
+ "epoch": 0.9187116564417178,
12585
+ "grad_norm": 5.043420791625977,
12586
+ "learning_rate": 1.79992756316944e-06,
12587
+ "loss": 0.7258,
12588
+ "step": 1797
12589
+ },
12590
+ {
12591
+ "epoch": 0.9192229038854806,
12592
+ "grad_norm": 5.176082134246826,
12593
+ "learning_rate": 1.7774928515719157e-06,
12594
+ "loss": 0.7435,
12595
+ "step": 1798
12596
+ },
12597
+ {
12598
+ "epoch": 0.9197341513292433,
12599
+ "grad_norm": 6.064294815063477,
12600
+ "learning_rate": 1.7551963035245588e-06,
12601
+ "loss": 0.7142,
12602
+ "step": 1799
12603
+ },
12604
+ {
12605
+ "epoch": 0.9202453987730062,
12606
+ "grad_norm": 6.77680778503418,
12607
+ "learning_rate": 1.733037982909791e-06,
12608
+ "loss": 0.4713,
12609
+ "step": 1800
12610
+ },
12611
+ {
12612
+ "epoch": 0.9207566462167689,
12613
+ "grad_norm": 1.9696067571640015,
12614
+ "learning_rate": 1.7110179532139781e-06,
12615
+ "loss": 0.8423,
12616
+ "step": 1801
12617
+ },
12618
+ {
12619
+ "epoch": 0.9212678936605317,
12620
+ "grad_norm": 2.4090499877929688,
12621
+ "learning_rate": 1.6891362775272812e-06,
12622
+ "loss": 0.9596,
12623
+ "step": 1802
12624
+ },
12625
+ {
12626
+ "epoch": 0.9217791411042945,
12627
+ "grad_norm": 2.441483974456787,
12628
+ "learning_rate": 1.6673930185434561e-06,
12629
+ "loss": 0.8638,
12630
+ "step": 1803
12631
+ },
12632
+ {
12633
+ "epoch": 0.9222903885480572,
12634
+ "grad_norm": 2.631037712097168,
12635
+ "learning_rate": 1.6457882385596646e-06,
12636
+ "loss": 1.0107,
12637
+ "step": 1804
12638
+ },
12639
+ {
12640
+ "epoch": 0.9228016359918201,
12641
+ "grad_norm": 2.69331431388855,
12642
+ "learning_rate": 1.6243219994763304e-06,
12643
+ "loss": 0.9641,
12644
+ "step": 1805
12645
+ },
12646
+ {
12647
+ "epoch": 0.9233128834355828,
12648
+ "grad_norm": 2.4592180252075195,
12649
+ "learning_rate": 1.6029943627969223e-06,
12650
+ "loss": 0.8723,
12651
+ "step": 1806
12652
+ },
12653
+ {
12654
+ "epoch": 0.9238241308793456,
12655
+ "grad_norm": 2.729130744934082,
12656
+ "learning_rate": 1.5818053896278162e-06,
12657
+ "loss": 0.9765,
12658
+ "step": 1807
12659
+ },
12660
+ {
12661
+ "epoch": 0.9243353783231084,
12662
+ "grad_norm": 2.6787896156311035,
12663
+ "learning_rate": 1.5607551406780717e-06,
12664
+ "loss": 0.8601,
12665
+ "step": 1808
12666
+ },
12667
+ {
12668
+ "epoch": 0.9248466257668712,
12669
+ "grad_norm": 2.9896671772003174,
12670
+ "learning_rate": 1.5398436762593061e-06,
12671
+ "loss": 0.8525,
12672
+ "step": 1809
12673
+ },
12674
+ {
12675
+ "epoch": 0.9253578732106339,
12676
+ "grad_norm": 3.0496749877929688,
12677
+ "learning_rate": 1.519071056285487e-06,
12678
+ "loss": 0.9679,
12679
+ "step": 1810
12680
+ },
12681
+ {
12682
+ "epoch": 0.9258691206543967,
12683
+ "grad_norm": 2.976200819015503,
12684
+ "learning_rate": 1.4984373402728014e-06,
12685
+ "loss": 0.8789,
12686
+ "step": 1811
12687
+ },
12688
+ {
12689
+ "epoch": 0.9263803680981595,
12690
+ "grad_norm": 3.0850539207458496,
12691
+ "learning_rate": 1.4779425873394259e-06,
12692
+ "loss": 0.85,
12693
+ "step": 1812
12694
+ },
12695
+ {
12696
+ "epoch": 0.9268916155419223,
12697
+ "grad_norm": 3.117995023727417,
12698
+ "learning_rate": 1.4575868562054228e-06,
12699
+ "loss": 0.8234,
12700
+ "step": 1813
12701
+ },
12702
+ {
12703
+ "epoch": 0.9274028629856851,
12704
+ "grad_norm": 2.9029812812805176,
12705
+ "learning_rate": 1.4373702051925065e-06,
12706
+ "loss": 0.9211,
12707
+ "step": 1814
12708
+ },
12709
+ {
12710
+ "epoch": 0.9279141104294478,
12711
+ "grad_norm": 3.243557929992676,
12712
+ "learning_rate": 1.4172926922239315e-06,
12713
+ "loss": 0.8936,
12714
+ "step": 1815
12715
+ },
12716
+ {
12717
+ "epoch": 0.9284253578732107,
12718
+ "grad_norm": 2.932993173599243,
12719
+ "learning_rate": 1.3973543748243e-06,
12720
+ "loss": 0.8516,
12721
+ "step": 1816
12722
+ },
12723
+ {
12724
+ "epoch": 0.9289366053169734,
12725
+ "grad_norm": 3.073636054992676,
12726
+ "learning_rate": 1.377555310119405e-06,
12727
+ "loss": 0.8922,
12728
+ "step": 1817
12729
+ },
12730
+ {
12731
+ "epoch": 0.9294478527607362,
12732
+ "grad_norm": 3.3185694217681885,
12733
+ "learning_rate": 1.3578955548360473e-06,
12734
+ "loss": 0.8148,
12735
+ "step": 1818
12736
+ },
12737
+ {
12738
+ "epoch": 0.929959100204499,
12739
+ "grad_norm": 3.1685848236083984,
12740
+ "learning_rate": 1.3383751653019029e-06,
12741
+ "loss": 0.9387,
12742
+ "step": 1819
12743
+ },
12744
+ {
12745
+ "epoch": 0.9304703476482618,
12746
+ "grad_norm": 3.10244083404541,
12747
+ "learning_rate": 1.31899419744535e-06,
12748
+ "loss": 0.7719,
12749
+ "step": 1820
12750
+ },
12751
+ {
12752
+ "epoch": 0.9309815950920245,
12753
+ "grad_norm": 3.1995668411254883,
12754
+ "learning_rate": 1.2997527067952875e-06,
12755
+ "loss": 0.9624,
12756
+ "step": 1821
12757
+ },
12758
+ {
12759
+ "epoch": 0.9314928425357873,
12760
+ "grad_norm": 3.6962995529174805,
12761
+ "learning_rate": 1.2806507484810215e-06,
12762
+ "loss": 0.8737,
12763
+ "step": 1822
12764
+ },
12765
+ {
12766
+ "epoch": 0.9320040899795501,
12767
+ "grad_norm": 3.9050567150115967,
12768
+ "learning_rate": 1.2616883772320508e-06,
12769
+ "loss": 1.0082,
12770
+ "step": 1823
12771
+ },
12772
+ {
12773
+ "epoch": 0.9325153374233128,
12774
+ "grad_norm": 3.417041540145874,
12775
+ "learning_rate": 1.2428656473779721e-06,
12776
+ "loss": 0.8719,
12777
+ "step": 1824
12778
+ },
12779
+ {
12780
+ "epoch": 0.9330265848670757,
12781
+ "grad_norm": 3.355666160583496,
12782
+ "learning_rate": 1.2241826128482625e-06,
12783
+ "loss": 0.9281,
12784
+ "step": 1825
12785
+ },
12786
+ {
12787
+ "epoch": 0.9335378323108384,
12788
+ "grad_norm": 3.6375527381896973,
12789
+ "learning_rate": 1.20563932717217e-06,
12790
+ "loss": 0.8613,
12791
+ "step": 1826
12792
+ },
12793
+ {
12794
+ "epoch": 0.9340490797546013,
12795
+ "grad_norm": 3.6801555156707764,
12796
+ "learning_rate": 1.1872358434785346e-06,
12797
+ "loss": 0.7958,
12798
+ "step": 1827
12799
+ },
12800
+ {
12801
+ "epoch": 0.934560327198364,
12802
+ "grad_norm": 3.775987148284912,
12803
+ "learning_rate": 1.1689722144956671e-06,
12804
+ "loss": 0.8864,
12805
+ "step": 1828
12806
+ },
12807
+ {
12808
+ "epoch": 0.9350715746421268,
12809
+ "grad_norm": 3.3887085914611816,
12810
+ "learning_rate": 1.1508484925511542e-06,
12811
+ "loss": 0.8428,
12812
+ "step": 1829
12813
+ },
12814
+ {
12815
+ "epoch": 0.9355828220858896,
12816
+ "grad_norm": 3.701101064682007,
12817
+ "learning_rate": 1.132864729571731e-06,
12818
+ "loss": 0.8598,
12819
+ "step": 1830
12820
+ },
12821
+ {
12822
+ "epoch": 0.9360940695296524,
12823
+ "grad_norm": 3.671170711517334,
12824
+ "learning_rate": 1.1150209770831588e-06,
12825
+ "loss": 0.8273,
12826
+ "step": 1831
12827
+ },
12828
+ {
12829
+ "epoch": 0.9366053169734151,
12830
+ "grad_norm": 3.679232120513916,
12831
+ "learning_rate": 1.0973172862100145e-06,
12832
+ "loss": 0.899,
12833
+ "step": 1832
12834
+ },
12835
+ {
12836
+ "epoch": 0.9371165644171779,
12837
+ "grad_norm": 3.9870572090148926,
12838
+ "learning_rate": 1.0797537076756127e-06,
12839
+ "loss": 0.9478,
12840
+ "step": 1833
12841
+ },
12842
+ {
12843
+ "epoch": 0.9376278118609407,
12844
+ "grad_norm": 3.767942190170288,
12845
+ "learning_rate": 1.0623302918018108e-06,
12846
+ "loss": 0.8134,
12847
+ "step": 1834
12848
+ },
12849
+ {
12850
+ "epoch": 0.9381390593047034,
12851
+ "grad_norm": 4.18915319442749,
12852
+ "learning_rate": 1.0450470885088937e-06,
12853
+ "loss": 0.9433,
12854
+ "step": 1835
12855
+ },
12856
+ {
12857
+ "epoch": 0.9386503067484663,
12858
+ "grad_norm": 4.106919765472412,
12859
+ "learning_rate": 1.0279041473154116e-06,
12860
+ "loss": 0.8786,
12861
+ "step": 1836
12862
+ },
12863
+ {
12864
+ "epoch": 0.939161554192229,
12865
+ "grad_norm": 4.100769519805908,
12866
+ "learning_rate": 1.010901517338042e-06,
12867
+ "loss": 0.8459,
12868
+ "step": 1837
12869
+ },
12870
+ {
12871
+ "epoch": 0.9396728016359919,
12872
+ "grad_norm": 3.8171629905700684,
12873
+ "learning_rate": 9.94039247291456e-07,
12874
+ "loss": 0.7617,
12875
+ "step": 1838
12876
+ },
12877
+ {
12878
+ "epoch": 0.9401840490797546,
12879
+ "grad_norm": 4.406624794006348,
12880
+ "learning_rate": 9.773173854881913e-07,
12881
+ "loss": 0.7923,
12882
+ "step": 1839
12883
+ },
12884
+ {
12885
+ "epoch": 0.9406952965235174,
12886
+ "grad_norm": 4.20837926864624,
12887
+ "learning_rate": 9.607359798384785e-07,
12888
+ "loss": 0.9773,
12889
+ "step": 1840
12890
+ },
12891
+ {
12892
+ "epoch": 0.9412065439672802,
12893
+ "grad_norm": 4.484842777252197,
12894
+ "learning_rate": 9.442950778501325e-07,
12895
+ "loss": 0.8711,
12896
+ "step": 1841
12897
+ },
12898
+ {
12899
+ "epoch": 0.941717791411043,
12900
+ "grad_norm": 4.719925403594971,
12901
+ "learning_rate": 9.279947266284061e-07,
12902
+ "loss": 0.8392,
12903
+ "step": 1842
12904
+ },
12905
+ {
12906
+ "epoch": 0.9422290388548057,
12907
+ "grad_norm": 4.476531982421875,
12908
+ "learning_rate": 9.118349728758468e-07,
12909
+ "loss": 0.767,
12910
+ "step": 1843
12911
+ },
12912
+ {
12913
+ "epoch": 0.9427402862985685,
12914
+ "grad_norm": 4.48267126083374,
12915
+ "learning_rate": 8.958158628922019e-07,
12916
+ "loss": 0.6777,
12917
+ "step": 1844
12918
+ },
12919
+ {
12920
+ "epoch": 0.9432515337423313,
12921
+ "grad_norm": 4.858860015869141,
12922
+ "learning_rate": 8.799374425742246e-07,
12923
+ "loss": 0.7469,
12924
+ "step": 1845
12925
+ },
12926
+ {
12927
+ "epoch": 0.943762781186094,
12928
+ "grad_norm": 4.577967166900635,
12929
+ "learning_rate": 8.641997574155846e-07,
12930
+ "loss": 0.676,
12931
+ "step": 1846
12932
+ },
12933
+ {
12934
+ "epoch": 0.9442740286298569,
12935
+ "grad_norm": 5.150705337524414,
12936
+ "learning_rate": 8.486028525067358e-07,
12937
+ "loss": 0.7106,
12938
+ "step": 1847
12939
+ },
12940
+ {
12941
+ "epoch": 0.9447852760736196,
12942
+ "grad_norm": 4.760867595672607,
12943
+ "learning_rate": 8.331467725347708e-07,
12944
+ "loss": 0.7528,
12945
+ "step": 1848
12946
+ },
12947
+ {
12948
+ "epoch": 0.9452965235173824,
12949
+ "grad_norm": 6.085768222808838,
12950
+ "learning_rate": 8.178315617832999e-07,
12951
+ "loss": 0.784,
12952
+ "step": 1849
12953
+ },
12954
+ {
12955
+ "epoch": 0.9458077709611452,
12956
+ "grad_norm": 6.886128902435303,
12957
+ "learning_rate": 8.026572641323393e-07,
12958
+ "loss": 0.5051,
12959
+ "step": 1850
12960
+ },
12961
+ {
12962
+ "epoch": 0.946319018404908,
12963
+ "grad_norm": 2.127772808074951,
12964
+ "learning_rate": 7.876239230581506e-07,
12965
+ "loss": 0.9763,
12966
+ "step": 1851
12967
+ },
12968
+ {
12969
+ "epoch": 0.9468302658486708,
12970
+ "grad_norm": 2.3633391857147217,
12971
+ "learning_rate": 7.727315816331515e-07,
12972
+ "loss": 1.012,
12973
+ "step": 1852
12974
+ },
12975
+ {
12976
+ "epoch": 0.9473415132924335,
12977
+ "grad_norm": 2.347792625427246,
12978
+ "learning_rate": 7.579802825257775e-07,
12979
+ "loss": 0.9064,
12980
+ "step": 1853
12981
+ },
12982
+ {
12983
+ "epoch": 0.9478527607361963,
12984
+ "grad_norm": 2.4126203060150146,
12985
+ "learning_rate": 7.43370068000343e-07,
12986
+ "loss": 0.8778,
12987
+ "step": 1854
12988
+ },
12989
+ {
12990
+ "epoch": 0.9483640081799591,
12991
+ "grad_norm": 2.6466588973999023,
12992
+ "learning_rate": 7.289009799169688e-07,
12993
+ "loss": 0.9782,
12994
+ "step": 1855
12995
+ },
12996
+ {
12997
+ "epoch": 0.9488752556237219,
12998
+ "grad_norm": 2.6971752643585205,
12999
+ "learning_rate": 7.145730597314049e-07,
13000
+ "loss": 0.979,
13001
+ "step": 1856
13002
+ },
13003
+ {
13004
+ "epoch": 0.9493865030674846,
13005
+ "grad_norm": 2.6623589992523193,
13006
+ "learning_rate": 7.003863484949413e-07,
13007
+ "loss": 0.9151,
13008
+ "step": 1857
13009
+ },
13010
+ {
13011
+ "epoch": 0.9498977505112475,
13012
+ "grad_norm": 2.865541696548462,
13013
+ "learning_rate": 6.86340886854292e-07,
13014
+ "loss": 0.8949,
13015
+ "step": 1858
13016
+ },
13017
+ {
13018
+ "epoch": 0.9504089979550102,
13019
+ "grad_norm": 3.0056710243225098,
13020
+ "learning_rate": 6.724367150514777e-07,
13021
+ "loss": 0.8936,
13022
+ "step": 1859
13023
+ },
13024
+ {
13025
+ "epoch": 0.950920245398773,
13026
+ "grad_norm": 2.805217742919922,
13027
+ "learning_rate": 6.58673872923693e-07,
13028
+ "loss": 0.831,
13029
+ "step": 1860
13030
+ },
13031
+ {
13032
+ "epoch": 0.9514314928425358,
13033
+ "grad_norm": 3.1351966857910156,
13034
+ "learning_rate": 6.450523999032177e-07,
13035
+ "loss": 1.0224,
13036
+ "step": 1861
13037
+ },
13038
+ {
13039
+ "epoch": 0.9519427402862985,
13040
+ "grad_norm": 2.93005633354187,
13041
+ "learning_rate": 6.315723350172775e-07,
13042
+ "loss": 0.8546,
13043
+ "step": 1862
13044
+ },
13045
+ {
13046
+ "epoch": 0.9524539877300614,
13047
+ "grad_norm": 3.181835651397705,
13048
+ "learning_rate": 6.182337168879671e-07,
13049
+ "loss": 0.903,
13050
+ "step": 1863
13051
+ },
13052
+ {
13053
+ "epoch": 0.9529652351738241,
13054
+ "grad_norm": 3.401155948638916,
13055
+ "learning_rate": 6.050365837320992e-07,
13056
+ "loss": 0.9181,
13057
+ "step": 1864
13058
+ },
13059
+ {
13060
+ "epoch": 0.9534764826175869,
13061
+ "grad_norm": 3.0578250885009766,
13062
+ "learning_rate": 5.919809733611171e-07,
13063
+ "loss": 0.8978,
13064
+ "step": 1865
13065
+ },
13066
+ {
13067
+ "epoch": 0.9539877300613497,
13068
+ "grad_norm": 3.142280101776123,
13069
+ "learning_rate": 5.790669231809875e-07,
13070
+ "loss": 0.9351,
13071
+ "step": 1866
13072
+ },
13073
+ {
13074
+ "epoch": 0.9544989775051125,
13075
+ "grad_norm": 3.1850273609161377,
13076
+ "learning_rate": 5.66294470192097e-07,
13077
+ "loss": 0.9621,
13078
+ "step": 1867
13079
+ },
13080
+ {
13081
+ "epoch": 0.9550102249488752,
13082
+ "grad_norm": 3.274036169052124,
13083
+ "learning_rate": 5.536636509891225e-07,
13084
+ "loss": 0.8428,
13085
+ "step": 1868
13086
+ },
13087
+ {
13088
+ "epoch": 0.9555214723926381,
13089
+ "grad_norm": 3.323293924331665,
13090
+ "learning_rate": 5.411745017609493e-07,
13091
+ "loss": 0.9623,
13092
+ "step": 1869
13093
+ },
13094
+ {
13095
+ "epoch": 0.9560327198364008,
13096
+ "grad_norm": 3.296380043029785,
13097
+ "learning_rate": 5.288270582905708e-07,
13098
+ "loss": 0.86,
13099
+ "step": 1870
13100
+ },
13101
+ {
13102
+ "epoch": 0.9565439672801636,
13103
+ "grad_norm": 3.7464077472686768,
13104
+ "learning_rate": 5.166213559549549e-07,
13105
+ "loss": 0.8665,
13106
+ "step": 1871
13107
+ },
13108
+ {
13109
+ "epoch": 0.9570552147239264,
13110
+ "grad_norm": 3.5171990394592285,
13111
+ "learning_rate": 5.045574297249833e-07,
13112
+ "loss": 0.9063,
13113
+ "step": 1872
13114
+ },
13115
+ {
13116
+ "epoch": 0.9575664621676891,
13117
+ "grad_norm": 3.684037685394287,
13118
+ "learning_rate": 4.926353141653184e-07,
13119
+ "loss": 0.9079,
13120
+ "step": 1873
13121
+ },
13122
+ {
13123
+ "epoch": 0.9580777096114519,
13124
+ "grad_norm": 3.424814462661743,
13125
+ "learning_rate": 4.80855043434325e-07,
13126
+ "loss": 0.8508,
13127
+ "step": 1874
13128
+ },
13129
+ {
13130
+ "epoch": 0.9585889570552147,
13131
+ "grad_norm": 3.3258156776428223,
13132
+ "learning_rate": 4.692166512839491e-07,
13133
+ "loss": 0.8615,
13134
+ "step": 1875
13135
+ },
13136
+ {
13137
+ "epoch": 0.9591002044989775,
13138
+ "grad_norm": 3.7672901153564453,
13139
+ "learning_rate": 4.577201710596612e-07,
13140
+ "loss": 0.9087,
13141
+ "step": 1876
13142
+ },
13143
+ {
13144
+ "epoch": 0.9596114519427403,
13145
+ "grad_norm": 3.638936996459961,
13146
+ "learning_rate": 4.4636563570031873e-07,
13147
+ "loss": 0.921,
13148
+ "step": 1877
13149
+ },
13150
+ {
13151
+ "epoch": 0.9601226993865031,
13152
+ "grad_norm": 3.7481777667999268,
13153
+ "learning_rate": 4.3515307773809855e-07,
13154
+ "loss": 0.8998,
13155
+ "step": 1878
13156
+ },
13157
+ {
13158
+ "epoch": 0.9606339468302658,
13159
+ "grad_norm": 4.0111212730407715,
13160
+ "learning_rate": 4.240825292983808e-07,
13161
+ "loss": 0.8379,
13162
+ "step": 1879
13163
+ },
13164
+ {
13165
+ "epoch": 0.9611451942740287,
13166
+ "grad_norm": 3.8063735961914062,
13167
+ "learning_rate": 4.131540220996877e-07,
13168
+ "loss": 1.0216,
13169
+ "step": 1880
13170
+ },
13171
+ {
13172
+ "epoch": 0.9616564417177914,
13173
+ "grad_norm": 3.818110942840576,
13174
+ "learning_rate": 4.023675874535671e-07,
13175
+ "loss": 0.9189,
13176
+ "step": 1881
13177
+ },
13178
+ {
13179
+ "epoch": 0.9621676891615542,
13180
+ "grad_norm": 3.307727098464966,
13181
+ "learning_rate": 3.917232562645035e-07,
13182
+ "loss": 0.7723,
13183
+ "step": 1882
13184
+ },
13185
+ {
13186
+ "epoch": 0.962678936605317,
13187
+ "grad_norm": 3.7070207595825195,
13188
+ "learning_rate": 3.812210590298515e-07,
13189
+ "loss": 0.8208,
13190
+ "step": 1883
13191
+ },
13192
+ {
13193
+ "epoch": 0.9631901840490797,
13194
+ "grad_norm": 3.999943971633911,
13195
+ "learning_rate": 3.7086102583972494e-07,
13196
+ "loss": 0.9011,
13197
+ "step": 1884
13198
+ },
13199
+ {
13200
+ "epoch": 0.9637014314928425,
13201
+ "grad_norm": 3.9105939865112305,
13202
+ "learning_rate": 3.6064318637693e-07,
13203
+ "loss": 0.9319,
13204
+ "step": 1885
13205
+ },
13206
+ {
13207
+ "epoch": 0.9642126789366053,
13208
+ "grad_norm": 3.4953696727752686,
13209
+ "learning_rate": 3.505675699168487e-07,
13210
+ "loss": 0.836,
13211
+ "step": 1886
13212
+ },
13213
+ {
13214
+ "epoch": 0.9647239263803681,
13215
+ "grad_norm": 4.094799995422363,
13216
+ "learning_rate": 3.406342053274003e-07,
13217
+ "loss": 0.7909,
13218
+ "step": 1887
13219
+ },
13220
+ {
13221
+ "epoch": 0.9652351738241309,
13222
+ "grad_norm": 3.863297700881958,
13223
+ "learning_rate": 3.3084312106892446e-07,
13224
+ "loss": 0.8657,
13225
+ "step": 1888
13226
+ },
13227
+ {
13228
+ "epoch": 0.9657464212678937,
13229
+ "grad_norm": 4.07185173034668,
13230
+ "learning_rate": 3.211943451941035e-07,
13231
+ "loss": 0.8233,
13232
+ "step": 1889
13233
+ },
13234
+ {
13235
+ "epoch": 0.9662576687116564,
13236
+ "grad_norm": 4.35256814956665,
13237
+ "learning_rate": 3.1168790534789605e-07,
13238
+ "loss": 0.9713,
13239
+ "step": 1890
13240
+ },
13241
+ {
13242
+ "epoch": 0.9667689161554193,
13243
+ "grad_norm": 3.9577109813690186,
13244
+ "learning_rate": 3.023238287674479e-07,
13245
+ "loss": 0.8875,
13246
+ "step": 1891
13247
+ },
13248
+ {
13249
+ "epoch": 0.967280163599182,
13250
+ "grad_norm": 4.198614597320557,
13251
+ "learning_rate": 2.9310214228202013e-07,
13252
+ "loss": 0.7918,
13253
+ "step": 1892
13254
+ },
13255
+ {
13256
+ "epoch": 0.9677914110429447,
13257
+ "grad_norm": 4.020992279052734,
13258
+ "learning_rate": 2.840228723129001e-07,
13259
+ "loss": 0.7969,
13260
+ "step": 1893
13261
+ },
13262
+ {
13263
+ "epoch": 0.9683026584867076,
13264
+ "grad_norm": 4.857772350311279,
13265
+ "learning_rate": 2.750860448733461e-07,
13266
+ "loss": 0.7989,
13267
+ "step": 1894
13268
+ },
13269
+ {
13270
+ "epoch": 0.9688139059304703,
13271
+ "grad_norm": 4.296475887298584,
13272
+ "learning_rate": 2.662916855684816e-07,
13273
+ "loss": 0.7896,
13274
+ "step": 1895
13275
+ },
13276
+ {
13277
+ "epoch": 0.9693251533742331,
13278
+ "grad_norm": 4.845669269561768,
13279
+ "learning_rate": 2.5763981959526786e-07,
13280
+ "loss": 0.9473,
13281
+ "step": 1896
13282
+ },
13283
+ {
13284
+ "epoch": 0.9698364008179959,
13285
+ "grad_norm": 5.276214599609375,
13286
+ "learning_rate": 2.4913047174237035e-07,
13287
+ "loss": 0.8389,
13288
+ "step": 1897
13289
+ },
13290
+ {
13291
+ "epoch": 0.9703476482617587,
13292
+ "grad_norm": 5.026844501495361,
13293
+ "learning_rate": 2.407636663901591e-07,
13294
+ "loss": 0.6934,
13295
+ "step": 1898
13296
+ },
13297
+ {
13298
+ "epoch": 0.9708588957055214,
13299
+ "grad_norm": 5.138829231262207,
13300
+ "learning_rate": 2.3253942751056968e-07,
13301
+ "loss": 0.5662,
13302
+ "step": 1899
13303
+ },
13304
+ {
13305
+ "epoch": 0.9713701431492843,
13306
+ "grad_norm": 8.76285457611084,
13307
+ "learning_rate": 2.2445777866709205e-07,
13308
+ "loss": 0.8929,
13309
+ "step": 1900
13310
+ },
13311
+ {
13312
+ "epoch": 0.971881390593047,
13313
+ "grad_norm": 2.1116087436676025,
13314
+ "learning_rate": 2.1651874301465979e-07,
13315
+ "loss": 0.8929,
13316
+ "step": 1901
13317
+ },
13318
+ {
13319
+ "epoch": 0.9723926380368099,
13320
+ "grad_norm": 2.1844208240509033,
13321
+ "learning_rate": 2.087223432996166e-07,
13322
+ "loss": 0.8937,
13323
+ "step": 1902
13324
+ },
13325
+ {
13326
+ "epoch": 0.9729038854805726,
13327
+ "grad_norm": 2.286012887954712,
13328
+ "learning_rate": 2.0106860185962194e-07,
13329
+ "loss": 0.8114,
13330
+ "step": 1903
13331
+ },
13332
+ {
13333
+ "epoch": 0.9734151329243353,
13334
+ "grad_norm": 2.505859613418579,
13335
+ "learning_rate": 1.935575406236123e-07,
13336
+ "loss": 0.9787,
13337
+ "step": 1904
13338
+ },
13339
+ {
13340
+ "epoch": 0.9739263803680982,
13341
+ "grad_norm": 2.708432674407959,
13342
+ "learning_rate": 1.861891811117178e-07,
13343
+ "loss": 0.9853,
13344
+ "step": 1905
13345
+ },
13346
+ {
13347
+ "epoch": 0.9744376278118609,
13348
+ "grad_norm": 2.945042610168457,
13349
+ "learning_rate": 1.7896354443521778e-07,
13350
+ "loss": 0.9986,
13351
+ "step": 1906
13352
+ },
13353
+ {
13354
+ "epoch": 0.9749488752556237,
13355
+ "grad_norm": 2.5554847717285156,
13356
+ "learning_rate": 1.7188065129647435e-07,
13357
+ "loss": 0.9626,
13358
+ "step": 1907
13359
+ },
13360
+ {
13361
+ "epoch": 0.9754601226993865,
13362
+ "grad_norm": 2.838167190551758,
13363
+ "learning_rate": 1.6494052198886555e-07,
13364
+ "loss": 0.9229,
13365
+ "step": 1908
13366
+ },
13367
+ {
13368
+ "epoch": 0.9759713701431493,
13369
+ "grad_norm": 2.6479218006134033,
13370
+ "learning_rate": 1.5814317639673005e-07,
13371
+ "loss": 0.8479,
13372
+ "step": 1909
13373
+ },
13374
+ {
13375
+ "epoch": 0.976482617586912,
13376
+ "grad_norm": 3.0106728076934814,
13377
+ "learning_rate": 1.5148863399532254e-07,
13378
+ "loss": 0.8836,
13379
+ "step": 1910
13380
+ },
13381
+ {
13382
+ "epoch": 0.9769938650306749,
13383
+ "grad_norm": 3.02433705329895,
13384
+ "learning_rate": 1.4497691385074175e-07,
13385
+ "loss": 0.9304,
13386
+ "step": 1911
13387
+ },
13388
+ {
13389
+ "epoch": 0.9775051124744376,
13390
+ "grad_norm": 3.085106134414673,
13391
+ "learning_rate": 1.3860803461989146e-07,
13392
+ "loss": 0.8676,
13393
+ "step": 1912
13394
  }
13395
  ],
13396
  "logging_steps": 1,
 
13410
  "attributes": {}
13411
  }
13412
  },
13413
+ "total_flos": 7.310480002095514e+17,
13414
  "train_batch_size": 8,
13415
  "trial_name": null,
13416
  "trial_params": null