besimray commited on
Commit
a6a61fd
·
verified ·
1 Parent(s): a07cccc

Training in progress, step 340, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4bd871ffaaf005b4ae82e3fe0574e188d290bd56611e88b2ee15dcc1170f10b7
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8791fae23023cf2106e0168a68fe7cfc76cf5009e3729f4af35acf290bf1f97
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b3f145bed0430746d11b64162710df80efd4565776390d51f03bf26c95c602c2
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:715153c301e34bc9e1ba77f1996ccb2e9a6de47d2fddaab7f57690744eaa6ccb
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:26b546f9e36af2b502a1f657b47e744aea84f04f078ad1d8590bd9010e8547d4
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1e92355acea1d637dfee0848ce97e0a35db7f2918dda84a5212f2ddf67f03a2c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:319ff6bc537233144a9a1321d603147241bbe6a5f63a69d9ed8b2711e764b26e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc3b597e507c818a1b809c10b26354c2c68ff2bc801d315b5aadc95eb0081237
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.018866539001465,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-330",
4
- "epoch": 0.01491491717701295,
5
  "eval_steps": 5,
6
- "global_step": 330,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2853,6 +2853,92 @@
2853
  "eval_samples_per_second": 52.813,
2854
  "eval_steps_per_second": 26.409,
2855
  "step": 330
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2856
  }
2857
  ],
2858
  "logging_steps": 1,
@@ -2881,7 +2967,7 @@
2881
  "attributes": {}
2882
  }
2883
  },
2884
- "total_flos": 3467221401600.0,
2885
  "train_batch_size": 2,
2886
  "trial_name": null,
2887
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.018733024597168,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-340",
4
+ "epoch": 0.01536688436419516,
5
  "eval_steps": 5,
6
+ "global_step": 340,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2853
  "eval_samples_per_second": 52.813,
2854
  "eval_steps_per_second": 26.409,
2855
  "step": 330
2856
+ },
2857
+ {
2858
+ "epoch": 0.01496011389573117,
2859
+ "grad_norm": 0.503404438495636,
2860
+ "learning_rate": 5.317869004397544e-05,
2861
+ "loss": 44.0551,
2862
+ "step": 331
2863
+ },
2864
+ {
2865
+ "epoch": 0.015005310614449392,
2866
+ "grad_norm": 0.5667140483856201,
2867
+ "learning_rate": 5.261313375270014e-05,
2868
+ "loss": 44.1005,
2869
+ "step": 332
2870
+ },
2871
+ {
2872
+ "epoch": 0.015050507333167613,
2873
+ "grad_norm": 0.4343127906322479,
2874
+ "learning_rate": 5.2049525349894625e-05,
2875
+ "loss": 44.0367,
2876
+ "step": 333
2877
+ },
2878
+ {
2879
+ "epoch": 0.015095704051885834,
2880
+ "grad_norm": 0.4030550420284271,
2881
+ "learning_rate": 5.148788800329278e-05,
2882
+ "loss": 44.0094,
2883
+ "step": 334
2884
+ },
2885
+ {
2886
+ "epoch": 0.015140900770604053,
2887
+ "grad_norm": 0.7541276812553406,
2888
+ "learning_rate": 5.092824479960625e-05,
2889
+ "loss": 44.0686,
2890
+ "step": 335
2891
+ },
2892
+ {
2893
+ "epoch": 0.015140900770604053,
2894
+ "eval_loss": 11.018802642822266,
2895
+ "eval_runtime": 176.1322,
2896
+ "eval_samples_per_second": 52.898,
2897
+ "eval_steps_per_second": 26.452,
2898
+ "step": 335
2899
+ },
2900
+ {
2901
+ "epoch": 0.015186097489322275,
2902
+ "grad_norm": 0.4742172360420227,
2903
+ "learning_rate": 5.0370618743575026e-05,
2904
+ "loss": 44.0855,
2905
+ "step": 336
2906
+ },
2907
+ {
2908
+ "epoch": 0.015231294208040496,
2909
+ "grad_norm": 0.4134741723537445,
2910
+ "learning_rate": 4.981503275702227e-05,
2911
+ "loss": 44.0928,
2912
+ "step": 337
2913
+ },
2914
+ {
2915
+ "epoch": 0.015276490926758717,
2916
+ "grad_norm": 0.6316869258880615,
2917
+ "learning_rate": 4.92615096779118e-05,
2918
+ "loss": 44.0649,
2919
+ "step": 338
2920
+ },
2921
+ {
2922
+ "epoch": 0.015321687645476938,
2923
+ "grad_norm": 0.4112119674682617,
2924
+ "learning_rate": 4.87100722594094e-05,
2925
+ "loss": 44.0769,
2926
+ "step": 339
2927
+ },
2928
+ {
2929
+ "epoch": 0.01536688436419516,
2930
+ "grad_norm": 0.4423971474170685,
2931
+ "learning_rate": 4.8160743168947496e-05,
2932
+ "loss": 44.059,
2933
+ "step": 340
2934
+ },
2935
+ {
2936
+ "epoch": 0.01536688436419516,
2937
+ "eval_loss": 11.018733024597168,
2938
+ "eval_runtime": 176.2897,
2939
+ "eval_samples_per_second": 52.85,
2940
+ "eval_steps_per_second": 26.428,
2941
+ "step": 340
2942
  }
2943
  ],
2944
  "logging_steps": 1,
 
2967
  "attributes": {}
2968
  }
2969
  },
2970
+ "total_flos": 3572288716800.0,
2971
  "train_batch_size": 2,
2972
  "trial_name": null,
2973
  "trial_params": null