besimray commited on
Commit
0f11f02
·
verified ·
1 Parent(s): a8dfe25

Training in progress, step 320, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8043d411e54c3df1ebca409696347b8e9a385dabbcffd552e858ae07b989cbf
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e47c5e37aeb9ba7c9d351cd6d3f2d34e7a16fbc5e8e214fa50df6016a912f81e
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a8fc14c40c058613fe56fd7b9d5397288001f7e3fc116344b43062f632ff8265
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2c8676f7ad327b1a667c04cbff92a64401964795cdbde06f9afb0439e5da7b66
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:314e02efe1b3db288f9e9c8ca583964838ea24b3581864ee7ae9f30730c12eb3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6d096910032d49eddb6029582f1caa936c833fef6de52cfa0342e08b8dcfcd2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c463750d3752994b4ac9a5cae2d97850942d858f0130dada5f5e81e74ee4daf
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2c4b5b0b5a3cd85dde69bc18b7d6ada534f4917a21cce1323408aed5ed9b4ef
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.018967628479004,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-310",
4
- "epoch": 0.014010982802648527,
5
  "eval_steps": 5,
6
- "global_step": 310,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2681,6 +2681,92 @@
2681
  "eval_samples_per_second": 52.817,
2682
  "eval_steps_per_second": 26.411,
2683
  "step": 310
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2684
  }
2685
  ],
2686
  "logging_steps": 1,
@@ -2709,7 +2795,7 @@
2709
  "attributes": {}
2710
  }
2711
  },
2712
- "total_flos": 3257086771200.0,
2713
  "train_batch_size": 2,
2714
  "trial_name": null,
2715
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.018913269042969,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-320",
4
+ "epoch": 0.014462949989830737,
5
  "eval_steps": 5,
6
+ "global_step": 320,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2681
  "eval_samples_per_second": 52.817,
2682
  "eval_steps_per_second": 26.411,
2683
  "step": 310
2684
+ },
2685
+ {
2686
+ "epoch": 0.014056179521366749,
2687
+ "grad_norm": 0.5137606859207153,
2688
+ "learning_rate": 6.486251759186572e-05,
2689
+ "loss": 44.1158,
2690
+ "step": 311
2691
+ },
2692
+ {
2693
+ "epoch": 0.01410137624008497,
2694
+ "grad_norm": 0.5155542492866516,
2695
+ "learning_rate": 6.426298499013994e-05,
2696
+ "loss": 44.1199,
2697
+ "step": 312
2698
+ },
2699
+ {
2700
+ "epoch": 0.014146572958803191,
2701
+ "grad_norm": 0.37395790219306946,
2702
+ "learning_rate": 6.366492139712886e-05,
2703
+ "loss": 44.0457,
2704
+ "step": 313
2705
+ },
2706
+ {
2707
+ "epoch": 0.014191769677521412,
2708
+ "grad_norm": 0.6116747260093689,
2709
+ "learning_rate": 6.306835139688438e-05,
2710
+ "loss": 44.1012,
2711
+ "step": 314
2712
+ },
2713
+ {
2714
+ "epoch": 0.014236966396239633,
2715
+ "grad_norm": 0.5333120822906494,
2716
+ "learning_rate": 6.24732995120626e-05,
2717
+ "loss": 44.1035,
2718
+ "step": 315
2719
+ },
2720
+ {
2721
+ "epoch": 0.014236966396239633,
2722
+ "eval_loss": 11.018932342529297,
2723
+ "eval_runtime": 176.1972,
2724
+ "eval_samples_per_second": 52.878,
2725
+ "eval_steps_per_second": 26.442,
2726
+ "step": 315
2727
+ },
2728
+ {
2729
+ "epoch": 0.014282163114957854,
2730
+ "grad_norm": 0.43927499651908875,
2731
+ "learning_rate": 6.187979020291583e-05,
2732
+ "loss": 44.0191,
2733
+ "step": 316
2734
+ },
2735
+ {
2736
+ "epoch": 0.014327359833676076,
2737
+ "grad_norm": 0.4511764347553253,
2738
+ "learning_rate": 6.12878478662872e-05,
2739
+ "loss": 44.036,
2740
+ "step": 317
2741
+ },
2742
+ {
2743
+ "epoch": 0.014372556552394297,
2744
+ "grad_norm": 0.4678284823894501,
2745
+ "learning_rate": 6.069749683460765e-05,
2746
+ "loss": 44.1023,
2747
+ "step": 318
2748
+ },
2749
+ {
2750
+ "epoch": 0.014417753271112518,
2751
+ "grad_norm": 0.4449803829193115,
2752
+ "learning_rate": 6.010876137489584e-05,
2753
+ "loss": 44.0835,
2754
+ "step": 319
2755
+ },
2756
+ {
2757
+ "epoch": 0.014462949989830737,
2758
+ "grad_norm": 0.42860502004623413,
2759
+ "learning_rate": 5.952166568776062e-05,
2760
+ "loss": 44.0725,
2761
+ "step": 320
2762
+ },
2763
+ {
2764
+ "epoch": 0.014462949989830737,
2765
+ "eval_loss": 11.018913269042969,
2766
+ "eval_runtime": 176.3627,
2767
+ "eval_samples_per_second": 52.829,
2768
+ "eval_steps_per_second": 26.417,
2769
+ "step": 320
2770
  }
2771
  ],
2772
  "logging_steps": 1,
 
2795
  "attributes": {}
2796
  }
2797
  },
2798
+ "total_flos": 3362154086400.0,
2799
  "train_batch_size": 2,
2800
  "trial_name": null,
2801
  "trial_params": null