iamnguyen commited on
Commit
af64517
·
verified ·
1 Parent(s): 6cde272

Training in progress, step 1980, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:72de3d2cd81e75f4ecc70f5acd430ff5b3122913c0c27640b64e85b0a53935be
3
  size 232169792
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c55df82de4fb0d3db68714ad442d1d38d6669acfa32b0ffab80c2d7a3ba49721
3
  size 232169792
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9e31b34b8d1af3114dd3b2085b21005ecf4d64b9404a12f78a6f1bda698b0b3
3
  size 117446154
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0132de8f62e0022bee7ed5782fad90b3e6cee029fe97753323c522d57dad933
3
  size 117446154
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2b0fa43290a459cc7417471e7cdda585e9a0e5edf2c2c7ef8247d02c7ef7b67
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d4df863dffbf5bc9bd29858a1d6f78b8ba5687345e29875e39b5ed6c18141865
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.859215730591027,
5
  "eval_steps": 500,
6
- "global_step": 1976,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -35575,6 +35575,78 @@
35575
  "rewards/margins": 0.32102257013320923,
35576
  "rewards/rejected": -0.3932061493396759,
35577
  "step": 1976
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
35578
  }
35579
  ],
35580
  "logging_steps": 1,
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.8650016951067916,
5
  "eval_steps": 500,
6
+ "global_step": 1980,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
35575
  "rewards/margins": 0.32102257013320923,
35576
  "rewards/rejected": -0.3932061493396759,
35577
  "step": 1976
35578
+ },
35579
+ {
35580
+ "epoch": 2.860662221719968,
35581
+ "grad_norm": 0.5800076127052307,
35582
+ "learning_rate": 2.641115862252713e-07,
35583
+ "log_odds_chosen": 3.0793051719665527,
35584
+ "log_odds_ratio": -0.4249449074268341,
35585
+ "logits/chosen": -1.7792483568191528,
35586
+ "logits/rejected": -1.466111183166504,
35587
+ "logps/chosen": -0.7235685586929321,
35588
+ "logps/rejected": -3.3135576248168945,
35589
+ "loss": 0.9257,
35590
+ "nll_loss": 0.8831884860992432,
35591
+ "rewards/accuracies": 0.765625,
35592
+ "rewards/chosen": -0.07235686480998993,
35593
+ "rewards/margins": 0.25899893045425415,
35594
+ "rewards/rejected": -0.3313557803630829,
35595
+ "step": 1977
35596
+ },
35597
+ {
35598
+ "epoch": 2.8621087128489093,
35599
+ "grad_norm": 0.6505364179611206,
35600
+ "learning_rate": 2.586473775788856e-07,
35601
+ "log_odds_chosen": 1.9093005657196045,
35602
+ "log_odds_ratio": -0.4956282377243042,
35603
+ "logits/chosen": -1.8862462043762207,
35604
+ "logits/rejected": -1.6489293575286865,
35605
+ "logps/chosen": -0.8198176622390747,
35606
+ "logps/rejected": -2.364813804626465,
35607
+ "loss": 1.0495,
35608
+ "nll_loss": 0.9999848008155823,
35609
+ "rewards/accuracies": 0.71875,
35610
+ "rewards/chosen": -0.08198177814483643,
35611
+ "rewards/margins": 0.1544996052980423,
35612
+ "rewards/rejected": -0.23648138344287872,
35613
+ "step": 1978
35614
+ },
35615
+ {
35616
+ "epoch": 2.8635552039778505,
35617
+ "grad_norm": 1.2835837602615356,
35618
+ "learning_rate": 2.5323999197676973e-07,
35619
+ "log_odds_chosen": 1.9394054412841797,
35620
+ "log_odds_ratio": -0.4927099645137787,
35621
+ "logits/chosen": -1.8224869966506958,
35622
+ "logits/rejected": -1.6263892650604248,
35623
+ "logps/chosen": -0.8234947919845581,
35624
+ "logps/rejected": -2.4225282669067383,
35625
+ "loss": 1.0989,
35626
+ "nll_loss": 1.0496528148651123,
35627
+ "rewards/accuracies": 0.65625,
35628
+ "rewards/chosen": -0.08234947919845581,
35629
+ "rewards/margins": 0.15990334749221802,
35630
+ "rewards/rejected": -0.24225284159183502,
35631
+ "step": 1979
35632
+ },
35633
+ {
35634
+ "epoch": 2.8650016951067916,
35635
+ "grad_norm": 0.6098787784576416,
35636
+ "learning_rate": 2.478894418379674e-07,
35637
+ "log_odds_chosen": 3.68991756439209,
35638
+ "log_odds_ratio": -0.4527060389518738,
35639
+ "logits/chosen": -1.8065725564956665,
35640
+ "logits/rejected": -1.5102980136871338,
35641
+ "logps/chosen": -0.761375367641449,
35642
+ "logps/rejected": -3.9927573204040527,
35643
+ "loss": 0.9802,
35644
+ "nll_loss": 0.9349774122238159,
35645
+ "rewards/accuracies": 0.75,
35646
+ "rewards/chosen": -0.07613754272460938,
35647
+ "rewards/margins": 0.32313817739486694,
35648
+ "rewards/rejected": -0.3992757499217987,
35649
+ "step": 1980
35650
  }
35651
  ],
35652
  "logging_steps": 1,