|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.9968652037617555, |
|
"eval_steps": 500, |
|
"global_step": 159, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 3.125e-08, |
|
"logits/chosen": -2.6344056129455566, |
|
"logits/rejected": -2.5906338691711426, |
|
"logps/chosen": -158.64126586914062, |
|
"logps/pi_response": -86.09246826171875, |
|
"logps/ref_response": -86.09246826171875, |
|
"logps/rejected": -148.42047119140625, |
|
"loss": 0.6931, |
|
"rewards/accuracies": 0.0, |
|
"rewards/chosen": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/rejected": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 3.1249999999999997e-07, |
|
"logits/chosen": -2.6920104026794434, |
|
"logits/rejected": -2.6961071491241455, |
|
"logps/chosen": -233.56158447265625, |
|
"logps/pi_response": -119.84620666503906, |
|
"logps/ref_response": -118.89913940429688, |
|
"logps/rejected": -252.62852478027344, |
|
"loss": 0.692, |
|
"rewards/accuracies": 0.4444444477558136, |
|
"rewards/chosen": -0.01218863669782877, |
|
"rewards/margins": 0.0017457219073548913, |
|
"rewards/rejected": -0.013934357091784477, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 4.990353313429303e-07, |
|
"logits/chosen": -2.6988842487335205, |
|
"logits/rejected": -2.670401096343994, |
|
"logps/chosen": -249.9190216064453, |
|
"logps/pi_response": -146.00906372070312, |
|
"logps/ref_response": -131.12376403808594, |
|
"logps/rejected": -276.4181823730469, |
|
"loss": 0.6796, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.17617936432361603, |
|
"rewards/margins": 0.050668977200984955, |
|
"rewards/rejected": -0.2268483191728592, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 4.882681251368548e-07, |
|
"logits/chosen": -2.5072598457336426, |
|
"logits/rejected": -2.4469008445739746, |
|
"logps/chosen": -241.61929321289062, |
|
"logps/pi_response": -130.57382202148438, |
|
"logps/ref_response": -113.8835220336914, |
|
"logps/rejected": -247.4310760498047, |
|
"loss": 0.6664, |
|
"rewards/accuracies": 0.5687500238418579, |
|
"rewards/chosen": -0.15306231379508972, |
|
"rewards/margins": 0.11979613453149796, |
|
"rewards/rejected": -0.2728584408760071, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 4.6604720940421207e-07, |
|
"logits/chosen": -2.322709798812866, |
|
"logits/rejected": -2.300786256790161, |
|
"logps/chosen": -245.057373046875, |
|
"logps/pi_response": -155.58706665039062, |
|
"logps/ref_response": -110.53662109375, |
|
"logps/rejected": -320.01849365234375, |
|
"loss": 0.6421, |
|
"rewards/accuracies": 0.6937500238418579, |
|
"rewards/chosen": -0.4277011454105377, |
|
"rewards/margins": 0.18936362862586975, |
|
"rewards/rejected": -0.6170647740364075, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 4.3344075855595097e-07, |
|
"logits/chosen": -2.2339208126068115, |
|
"logits/rejected": -2.2038655281066895, |
|
"logps/chosen": -281.0859375, |
|
"logps/pi_response": -177.91244506835938, |
|
"logps/ref_response": -120.1254653930664, |
|
"logps/rejected": -306.99078369140625, |
|
"loss": 0.6311, |
|
"rewards/accuracies": 0.643750011920929, |
|
"rewards/chosen": -0.552875280380249, |
|
"rewards/margins": 0.14258158206939697, |
|
"rewards/rejected": -0.695456862449646, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 3.920161866827889e-07, |
|
"logits/chosen": -2.136162281036377, |
|
"logits/rejected": -2.102271556854248, |
|
"logps/chosen": -285.7301330566406, |
|
"logps/pi_response": -197.15200805664062, |
|
"logps/ref_response": -107.5267333984375, |
|
"logps/rejected": -330.02581787109375, |
|
"loss": 0.632, |
|
"rewards/accuracies": 0.637499988079071, |
|
"rewards/chosen": -0.7207737565040588, |
|
"rewards/margins": 0.32419511675834656, |
|
"rewards/rejected": -1.044968843460083, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 3.4376480090239047e-07, |
|
"logits/chosen": -2.116626024246216, |
|
"logits/rejected": -2.0622029304504395, |
|
"logps/chosen": -290.5029602050781, |
|
"logps/pi_response": -204.41468811035156, |
|
"logps/ref_response": -112.31196594238281, |
|
"logps/rejected": -308.43865966796875, |
|
"loss": 0.6112, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7230253219604492, |
|
"rewards/margins": 0.3711085617542267, |
|
"rewards/rejected": -1.0941338539123535, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 2.910060778827554e-07, |
|
"logits/chosen": -2.1222305297851562, |
|
"logits/rejected": -2.0609357357025146, |
|
"logps/chosen": -270.329345703125, |
|
"logps/pi_response": -193.92774963378906, |
|
"logps/ref_response": -99.59819793701172, |
|
"logps/rejected": -328.5818176269531, |
|
"loss": 0.6234, |
|
"rewards/accuracies": 0.6875, |
|
"rewards/chosen": -0.7498321533203125, |
|
"rewards/margins": 0.32974669337272644, |
|
"rewards/rejected": -1.0795788764953613, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 2.3627616503391812e-07, |
|
"logits/chosen": -2.1789047718048096, |
|
"logits/rejected": -2.1352646350860596, |
|
"logps/chosen": -281.3520202636719, |
|
"logps/pi_response": -184.90760803222656, |
|
"logps/ref_response": -100.86174011230469, |
|
"logps/rejected": -323.51318359375, |
|
"loss": 0.6141, |
|
"rewards/accuracies": 0.675000011920929, |
|
"rewards/chosen": -0.7342792749404907, |
|
"rewards/margins": 0.34946689009666443, |
|
"rewards/rejected": -1.0837461948394775, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 1.8220596619089573e-07, |
|
"logits/chosen": -2.075347661972046, |
|
"logits/rejected": -2.0635223388671875, |
|
"logps/chosen": -291.8459167480469, |
|
"logps/pi_response": -209.01785278320312, |
|
"logps/ref_response": -112.6884765625, |
|
"logps/rejected": -376.8053894042969, |
|
"loss": 0.5877, |
|
"rewards/accuracies": 0.7124999761581421, |
|
"rewards/chosen": -0.8351489305496216, |
|
"rewards/margins": 0.3953370749950409, |
|
"rewards/rejected": -1.2304860353469849, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 1.3139467229135998e-07, |
|
"logits/chosen": -2.1212668418884277, |
|
"logits/rejected": -2.1371259689331055, |
|
"logps/chosen": -294.1075439453125, |
|
"logps/pi_response": -189.68203735351562, |
|
"logps/ref_response": -100.60249328613281, |
|
"logps/rejected": -319.2774963378906, |
|
"loss": 0.6056, |
|
"rewards/accuracies": 0.606249988079071, |
|
"rewards/chosen": -0.7740401029586792, |
|
"rewards/margins": 0.3121982216835022, |
|
"rewards/rejected": -1.086238145828247, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 8.628481651367875e-08, |
|
"logits/chosen": -2.127702236175537, |
|
"logits/rejected": -2.052624225616455, |
|
"logps/chosen": -323.14556884765625, |
|
"logps/pi_response": -227.27230834960938, |
|
"logps/ref_response": -130.39340209960938, |
|
"logps/rejected": -348.0731506347656, |
|
"loss": 0.5969, |
|
"rewards/accuracies": 0.6187499761581421, |
|
"rewards/chosen": -0.8538217544555664, |
|
"rewards/margins": 0.24452969431877136, |
|
"rewards/rejected": -1.0983514785766602, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 4.904486005914027e-08, |
|
"logits/chosen": -2.16937518119812, |
|
"logits/rejected": -2.149695634841919, |
|
"logps/chosen": -293.9873046875, |
|
"logps/pi_response": -207.9149932861328, |
|
"logps/ref_response": -113.8158950805664, |
|
"logps/rejected": -352.2449035644531, |
|
"loss": 0.5837, |
|
"rewards/accuracies": 0.6625000238418579, |
|
"rewards/chosen": -0.8029237985610962, |
|
"rewards/margins": 0.38218849897384644, |
|
"rewards/rejected": -1.1851122379302979, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 2.1464952759020856e-08, |
|
"logits/chosen": -2.166949987411499, |
|
"logits/rejected": -2.0973129272460938, |
|
"logps/chosen": -321.9905090332031, |
|
"logps/pi_response": -238.9150390625, |
|
"logps/ref_response": -119.49796295166016, |
|
"logps/rejected": -366.47198486328125, |
|
"loss": 0.5936, |
|
"rewards/accuracies": 0.71875, |
|
"rewards/chosen": -0.9360674023628235, |
|
"rewards/margins": 0.3974238932132721, |
|
"rewards/rejected": -1.3334912061691284, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 4.8708793644441086e-09, |
|
"logits/chosen": -2.112703800201416, |
|
"logits/rejected": -2.0817809104919434, |
|
"logps/chosen": -298.79486083984375, |
|
"logps/pi_response": -201.3252716064453, |
|
"logps/ref_response": -101.72931671142578, |
|
"logps/rejected": -328.84320068359375, |
|
"loss": 0.5747, |
|
"rewards/accuracies": 0.699999988079071, |
|
"rewards/chosen": -0.8357221484184265, |
|
"rewards/margins": 0.33179959654808044, |
|
"rewards/rejected": -1.1675217151641846, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"step": 159, |
|
"total_flos": 0.0, |
|
"train_loss": 0.6203871613028664, |
|
"train_runtime": 4552.526, |
|
"train_samples_per_second": 4.476, |
|
"train_steps_per_second": 0.035 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 159, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 100, |
|
"total_flos": 0.0, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|