besimray commited on
Commit
0fe8826
·
verified ·
1 Parent(s): 0d8496e

Training in progress, step 310, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5cd7e82b02fdb1fc211ba8cfc78c940346dac0e57cf3b9fa129b21a5999b2993
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8043d411e54c3df1ebca409696347b8e9a385dabbcffd552e858ae07b989cbf
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:401b7cff7a753623117fa84b165624b04510bb6a1ade9be169f1a8be1a9413f1
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8fc14c40c058613fe56fd7b9d5397288001f7e3fc116344b43062f632ff8265
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04fa9e95bd7c12dd6aadf40d93322ffe6416ce0beade0e6a39e0e8c9d50f3d43
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:314e02efe1b3db288f9e9c8ca583964838ea24b3581864ee7ae9f30730c12eb3
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae6873a9dc297bddde060ab581654fd818f7b1bcaf7ce11d1538178040ff306d
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7c463750d3752994b4ac9a5cae2d97850942d858f0130dada5f5e81e74ee4daf
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.019237518310547,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-300",
4
- "epoch": 0.013559015615466317,
5
  "eval_steps": 5,
6
- "global_step": 300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2595,6 +2595,92 @@
2595
  "eval_samples_per_second": 52.809,
2596
  "eval_steps_per_second": 26.407,
2597
  "step": 300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2598
  }
2599
  ],
2600
  "logging_steps": 1,
@@ -2623,7 +2709,7 @@
2623
  "attributes": {}
2624
  }
2625
  },
2626
- "total_flos": 3152019456000.0,
2627
  "train_batch_size": 2,
2628
  "trial_name": null,
2629
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.018967628479004,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-310",
4
+ "epoch": 0.014010982802648527,
5
  "eval_steps": 5,
6
+ "global_step": 310,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2595
  "eval_samples_per_second": 52.809,
2596
  "eval_steps_per_second": 26.407,
2597
  "step": 300
2598
+ },
2599
+ {
2600
+ "epoch": 0.013604212334184539,
2601
+ "grad_norm": 0.49278682470321655,
2602
+ "learning_rate": 7.093318872950665e-05,
2603
+ "loss": 44.0319,
2604
+ "step": 301
2605
+ },
2606
+ {
2607
+ "epoch": 0.01364940905290276,
2608
+ "grad_norm": 0.5009450316429138,
2609
+ "learning_rate": 7.032033094488095e-05,
2610
+ "loss": 44.0988,
2611
+ "step": 302
2612
+ },
2613
+ {
2614
+ "epoch": 0.013694605771620981,
2615
+ "grad_norm": 0.4270615577697754,
2616
+ "learning_rate": 6.97086931752028e-05,
2617
+ "loss": 44.1025,
2618
+ "step": 303
2619
+ },
2620
+ {
2621
+ "epoch": 0.013739802490339202,
2622
+ "grad_norm": 0.49744102358818054,
2623
+ "learning_rate": 6.909830056250527e-05,
2624
+ "loss": 44.0652,
2625
+ "step": 304
2626
+ },
2627
+ {
2628
+ "epoch": 0.013784999209057422,
2629
+ "grad_norm": 0.48600587248802185,
2630
+ "learning_rate": 6.848917819763793e-05,
2631
+ "loss": 44.1292,
2632
+ "step": 305
2633
+ },
2634
+ {
2635
+ "epoch": 0.013784999209057422,
2636
+ "eval_loss": 11.01909351348877,
2637
+ "eval_runtime": 176.1259,
2638
+ "eval_samples_per_second": 52.9,
2639
+ "eval_steps_per_second": 26.453,
2640
+ "step": 305
2641
+ },
2642
+ {
2643
+ "epoch": 0.013830195927775643,
2644
+ "grad_norm": 0.4116569459438324,
2645
+ "learning_rate": 6.788135111923545e-05,
2646
+ "loss": 44.0897,
2647
+ "step": 306
2648
+ },
2649
+ {
2650
+ "epoch": 0.013875392646493864,
2651
+ "grad_norm": 0.4364916682243347,
2652
+ "learning_rate": 6.72748443126883e-05,
2653
+ "loss": 44.1195,
2654
+ "step": 307
2655
+ },
2656
+ {
2657
+ "epoch": 0.013920589365212085,
2658
+ "grad_norm": 0.5589216351509094,
2659
+ "learning_rate": 6.666968270911584e-05,
2660
+ "loss": 44.0911,
2661
+ "step": 308
2662
+ },
2663
+ {
2664
+ "epoch": 0.013965786083930306,
2665
+ "grad_norm": 0.5414496064186096,
2666
+ "learning_rate": 6.606589118434126e-05,
2667
+ "loss": 44.1532,
2668
+ "step": 309
2669
+ },
2670
+ {
2671
+ "epoch": 0.014010982802648527,
2672
+ "grad_norm": 0.4488687515258789,
2673
+ "learning_rate": 6.546349455786926e-05,
2674
+ "loss": 44.0637,
2675
+ "step": 310
2676
+ },
2677
+ {
2678
+ "epoch": 0.014010982802648527,
2679
+ "eval_loss": 11.018967628479004,
2680
+ "eval_runtime": 176.4018,
2681
+ "eval_samples_per_second": 52.817,
2682
+ "eval_steps_per_second": 26.411,
2683
+ "step": 310
2684
  }
2685
  ],
2686
  "logging_steps": 1,
 
2709
  "attributes": {}
2710
  }
2711
  },
2712
+ "total_flos": 3257086771200.0,
2713
  "train_batch_size": 2,
2714
  "trial_name": null,
2715
  "trial_params": null