Training in progress, step 1980, checkpoint
Browse files
last-checkpoint/adapter_model.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 232169792
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:c55df82de4fb0d3db68714ad442d1d38d6669acfa32b0ffab80c2d7a3ba49721
|
3 |
size 232169792
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 117446154
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0132de8f62e0022bee7ed5782fad90b3e6cee029fe97753323c522d57dad933
|
3 |
size 117446154
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d4df863dffbf5bc9bd29858a1d6f78b8ba5687345e29875e39b5ed6c18141865
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch": 2.
|
5 |
"eval_steps": 500,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -35575,6 +35575,78 @@
|
|
35575 |
"rewards/margins": 0.32102257013320923,
|
35576 |
"rewards/rejected": -0.3932061493396759,
|
35577 |
"step": 1976
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
35578 |
}
|
35579 |
],
|
35580 |
"logging_steps": 1,
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.8650016951067916,
|
5 |
"eval_steps": 500,
|
6 |
+
"global_step": 1980,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
35575 |
"rewards/margins": 0.32102257013320923,
|
35576 |
"rewards/rejected": -0.3932061493396759,
|
35577 |
"step": 1976
|
35578 |
+
},
|
35579 |
+
{
|
35580 |
+
"epoch": 2.860662221719968,
|
35581 |
+
"grad_norm": 0.5800076127052307,
|
35582 |
+
"learning_rate": 2.641115862252713e-07,
|
35583 |
+
"log_odds_chosen": 3.0793051719665527,
|
35584 |
+
"log_odds_ratio": -0.4249449074268341,
|
35585 |
+
"logits/chosen": -1.7792483568191528,
|
35586 |
+
"logits/rejected": -1.466111183166504,
|
35587 |
+
"logps/chosen": -0.7235685586929321,
|
35588 |
+
"logps/rejected": -3.3135576248168945,
|
35589 |
+
"loss": 0.9257,
|
35590 |
+
"nll_loss": 0.8831884860992432,
|
35591 |
+
"rewards/accuracies": 0.765625,
|
35592 |
+
"rewards/chosen": -0.07235686480998993,
|
35593 |
+
"rewards/margins": 0.25899893045425415,
|
35594 |
+
"rewards/rejected": -0.3313557803630829,
|
35595 |
+
"step": 1977
|
35596 |
+
},
|
35597 |
+
{
|
35598 |
+
"epoch": 2.8621087128489093,
|
35599 |
+
"grad_norm": 0.6505364179611206,
|
35600 |
+
"learning_rate": 2.586473775788856e-07,
|
35601 |
+
"log_odds_chosen": 1.9093005657196045,
|
35602 |
+
"log_odds_ratio": -0.4956282377243042,
|
35603 |
+
"logits/chosen": -1.8862462043762207,
|
35604 |
+
"logits/rejected": -1.6489293575286865,
|
35605 |
+
"logps/chosen": -0.8198176622390747,
|
35606 |
+
"logps/rejected": -2.364813804626465,
|
35607 |
+
"loss": 1.0495,
|
35608 |
+
"nll_loss": 0.9999848008155823,
|
35609 |
+
"rewards/accuracies": 0.71875,
|
35610 |
+
"rewards/chosen": -0.08198177814483643,
|
35611 |
+
"rewards/margins": 0.1544996052980423,
|
35612 |
+
"rewards/rejected": -0.23648138344287872,
|
35613 |
+
"step": 1978
|
35614 |
+
},
|
35615 |
+
{
|
35616 |
+
"epoch": 2.8635552039778505,
|
35617 |
+
"grad_norm": 1.2835837602615356,
|
35618 |
+
"learning_rate": 2.5323999197676973e-07,
|
35619 |
+
"log_odds_chosen": 1.9394054412841797,
|
35620 |
+
"log_odds_ratio": -0.4927099645137787,
|
35621 |
+
"logits/chosen": -1.8224869966506958,
|
35622 |
+
"logits/rejected": -1.6263892650604248,
|
35623 |
+
"logps/chosen": -0.8234947919845581,
|
35624 |
+
"logps/rejected": -2.4225282669067383,
|
35625 |
+
"loss": 1.0989,
|
35626 |
+
"nll_loss": 1.0496528148651123,
|
35627 |
+
"rewards/accuracies": 0.65625,
|
35628 |
+
"rewards/chosen": -0.08234947919845581,
|
35629 |
+
"rewards/margins": 0.15990334749221802,
|
35630 |
+
"rewards/rejected": -0.24225284159183502,
|
35631 |
+
"step": 1979
|
35632 |
+
},
|
35633 |
+
{
|
35634 |
+
"epoch": 2.8650016951067916,
|
35635 |
+
"grad_norm": 0.6098787784576416,
|
35636 |
+
"learning_rate": 2.478894418379674e-07,
|
35637 |
+
"log_odds_chosen": 3.68991756439209,
|
35638 |
+
"log_odds_ratio": -0.4527060389518738,
|
35639 |
+
"logits/chosen": -1.8065725564956665,
|
35640 |
+
"logits/rejected": -1.5102980136871338,
|
35641 |
+
"logps/chosen": -0.761375367641449,
|
35642 |
+
"logps/rejected": -3.9927573204040527,
|
35643 |
+
"loss": 0.9802,
|
35644 |
+
"nll_loss": 0.9349774122238159,
|
35645 |
+
"rewards/accuracies": 0.75,
|
35646 |
+
"rewards/chosen": -0.07613754272460938,
|
35647 |
+
"rewards/margins": 0.32313817739486694,
|
35648 |
+
"rewards/rejected": -0.3992757499217987,
|
35649 |
+
"step": 1980
|
35650 |
}
|
35651 |
],
|
35652 |
"logging_steps": 1,
|