DuongTrongChi commited on
Commit
fb4b6d2
·
verified ·
1 Parent(s): 4354e12

Training in progress, step 364, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dacaef364ad77ac1803a72f3736525dd206d7c28282d058f9a9246a1805bf1b0
3
  size 100198584
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f08c2fe8d23621408caef4c661f4748b9c92ff95bb32092949404a462f56dda1
3
  size 100198584
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abd3846414a209932cf3f9c1ecaa3a5f1a053beebfa2cd6b4a352bc0450a8909
3
  size 50675604
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d15efa0709f033ae788595ad9b99e85de1c28c66dfc1689bb433561a7928ba5
3
  size 50675604
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:358cc543a4dfee9cdbd11d603288fa82292225e6222d3a6078f1df3f3c96f685
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fd5dac93345174330ac6b4ac1ca4e52d3e5368cc463298f98057ba0165f0b917
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.99039780521262,
5
  "eval_steps": 500,
6
- "global_step": 361,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2534,6 +2534,27 @@
2534
  "learning_rate": 2.2727272727272728e-06,
2535
  "loss": 0.9949,
2536
  "step": 361
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2537
  }
2538
  ],
2539
  "logging_steps": 1,
@@ -2548,12 +2569,12 @@
2548
  "should_evaluate": false,
2549
  "should_log": false,
2550
  "should_save": true,
2551
- "should_training_stop": false
2552
  },
2553
  "attributes": {}
2554
  }
2555
  },
2556
- "total_flos": 7.504897038633492e+17,
2557
  "train_batch_size": 4,
2558
  "trial_name": null,
2559
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.9986282578875172,
5
  "eval_steps": 500,
6
+ "global_step": 364,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2534
  "learning_rate": 2.2727272727272728e-06,
2535
  "loss": 0.9949,
2536
  "step": 361
2537
+ },
2538
+ {
2539
+ "epoch": 0.9931412894375857,
2540
+ "grad_norm": 0.08098744601011276,
2541
+ "learning_rate": 1.5151515151515152e-06,
2542
+ "loss": 1.0165,
2543
+ "step": 362
2544
+ },
2545
+ {
2546
+ "epoch": 0.9958847736625515,
2547
+ "grad_norm": 0.0881686583161354,
2548
+ "learning_rate": 7.575757575757576e-07,
2549
+ "loss": 1.0239,
2550
+ "step": 363
2551
+ },
2552
+ {
2553
+ "epoch": 0.9986282578875172,
2554
+ "grad_norm": 0.08588221669197083,
2555
+ "learning_rate": 0.0,
2556
+ "loss": 1.0815,
2557
+ "step": 364
2558
  }
2559
  ],
2560
  "logging_steps": 1,
 
2569
  "should_evaluate": false,
2570
  "should_log": false,
2571
  "should_save": true,
2572
+ "should_training_stop": true
2573
  },
2574
  "attributes": {}
2575
  }
2576
  },
2577
+ "total_flos": 7.563690834619392e+17,
2578
  "train_batch_size": 4,
2579
  "trial_name": null,
2580
  "trial_params": null