|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 100.0, |
|
"global_step": 239, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/generated": -3.130502223968506, |
|
"logits/oppo_generated": -3.1088104248046875, |
|
"logits/oppo_real": -3.130502223968506, |
|
"logits/real": -3.1088104248046875, |
|
"logps/generated": -99.40917205810547, |
|
"logps/oppo_gen": -99.40917205810547, |
|
"logps/oppo_real": -459.3097229003906, |
|
"logps/real": -459.3097229003906, |
|
"loss": 2.0, |
|
"loss/gen": 2.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/generated": -3.0933988094329834, |
|
"logits/oppo_generated": -2.919645309448242, |
|
"logits/oppo_real": -3.0933988094329834, |
|
"logits/real": -2.919645309448242, |
|
"logps/generated": -103.65153503417969, |
|
"logps/oppo_gen": -103.65153503417969, |
|
"logps/oppo_real": -392.1358642578125, |
|
"logps/real": -392.1358642578125, |
|
"loss": 2.0, |
|
"loss/gen": 2.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/generated": -2.6572537422180176, |
|
"logits/oppo_generated": -2.8074941635131836, |
|
"logits/oppo_real": -2.6572537422180176, |
|
"logits/real": -2.8074941635131836, |
|
"logps/generated": -72.88986206054688, |
|
"logps/oppo_gen": -72.88986206054688, |
|
"logps/oppo_real": -291.916748046875, |
|
"logps/real": -291.916748046875, |
|
"loss": 2.0, |
|
"loss/gen": 2.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/generated": -2.8966193199157715, |
|
"logits/oppo_generated": -2.768460273742676, |
|
"logits/oppo_real": -2.8966193199157715, |
|
"logits/real": -2.768460273742676, |
|
"logps/generated": -64.05287170410156, |
|
"logps/oppo_gen": -64.05287170410156, |
|
"logps/oppo_real": -376.8367919921875, |
|
"logps/real": -376.8367919921875, |
|
"loss": 2.0, |
|
"loss/gen": 2.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"grad_norm": 0.0, |
|
"learning_rate": 0.0, |
|
"logits/generated": -2.889317512512207, |
|
"logits/oppo_generated": -2.708950996398926, |
|
"logits/oppo_real": -2.889317512512207, |
|
"logits/real": -2.708950996398926, |
|
"logps/generated": -48.29164123535156, |
|
"logps/oppo_gen": -48.29164123535156, |
|
"logps/oppo_real": -173.0751953125, |
|
"logps/real": -173.0751953125, |
|
"loss": 2.0, |
|
"loss/gen": 2.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 21.44563623958737, |
|
"learning_rate": 1.6666666666666667e-08, |
|
"logits/generated": -2.957958698272705, |
|
"logits/oppo_generated": -2.749436378479004, |
|
"logits/oppo_real": -2.957958698272705, |
|
"logits/real": -2.749436378479004, |
|
"logps/generated": -48.84138488769531, |
|
"logps/oppo_gen": -48.84138488769531, |
|
"logps/oppo_real": -139.2998046875, |
|
"logps/real": -139.2998046875, |
|
"loss": 2.0, |
|
"loss/gen": 2.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 24.181090932615223, |
|
"learning_rate": 3.3333333333333334e-08, |
|
"logits/generated": -3.1195316314697266, |
|
"logits/oppo_generated": -2.9545342922210693, |
|
"logits/oppo_real": -3.1195316314697266, |
|
"logits/real": -2.9545342922210693, |
|
"logps/generated": -163.2059783935547, |
|
"logps/oppo_gen": -163.2059783935547, |
|
"logps/oppo_real": -432.88226318359375, |
|
"logps/real": -432.88226318359375, |
|
"loss": 2.0, |
|
"loss/gen": 2.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"grad_norm": 23.92123585650089, |
|
"learning_rate": 5e-08, |
|
"logits/generated": -2.910332441329956, |
|
"logits/oppo_generated": -2.9416637420654297, |
|
"logits/oppo_real": -2.910332441329956, |
|
"logits/real": -2.9416637420654297, |
|
"logps/generated": -69.29386901855469, |
|
"logps/oppo_gen": -69.29386901855469, |
|
"logps/oppo_real": -311.59619140625, |
|
"logps/real": -311.59619140625, |
|
"loss": 2.0, |
|
"loss/gen": 2.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 0.0, |
|
"rewards/generated": 0.0, |
|
"rewards/margins": 0.0, |
|
"rewards/real": 0.0, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 18.72729908334438, |
|
"learning_rate": 6.666666666666667e-08, |
|
"logits/generated": -2.409820318222046, |
|
"logits/oppo_generated": -2.294548273086548, |
|
"logits/oppo_real": -2.409976005554199, |
|
"logits/real": -2.29451322555542, |
|
"logps/generated": -82.21556091308594, |
|
"logps/oppo_gen": -82.20011138916016, |
|
"logps/oppo_real": -381.1852111816406, |
|
"logps/real": -381.1634521484375, |
|
"loss": 2.0, |
|
"loss/gen": 1.9998455047607422, |
|
"loss/real": 7.733702659606934e-06, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -0.015454292297363281, |
|
"rewards/margins": 0.03723621368408203, |
|
"rewards/real": 0.02178192138671875, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"grad_norm": 17.202277028374397, |
|
"learning_rate": 8.333333333333333e-08, |
|
"logits/generated": -2.962700366973877, |
|
"logits/oppo_generated": -2.9239017963409424, |
|
"logits/oppo_real": -2.963313579559326, |
|
"logits/real": -2.923351526260376, |
|
"logps/generated": -93.16413879394531, |
|
"logps/oppo_gen": -93.09856414794922, |
|
"logps/oppo_real": -233.10401916503906, |
|
"logps/real": -233.077880859375, |
|
"loss": 1.9995, |
|
"loss/gen": 1.9993443489074707, |
|
"loss/real": 5.08427619934082e-05, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -0.06557583808898926, |
|
"rewards/margins": 0.09174036979675293, |
|
"rewards/real": 0.026164531707763672, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 17.401866312895148, |
|
"learning_rate": 1e-07, |
|
"logits/generated": -2.8563976287841797, |
|
"logits/oppo_generated": -2.837850570678711, |
|
"logits/oppo_real": -2.857771396636963, |
|
"logits/real": -2.836169958114624, |
|
"logps/generated": -59.6667594909668, |
|
"logps/oppo_gen": -59.46293640136719, |
|
"logps/oppo_real": -142.69805908203125, |
|
"logps/real": -142.69808959960938, |
|
"loss": 1.998, |
|
"loss/gen": 1.9979617595672607, |
|
"loss/real": 0.0006099119782447815, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -0.20382428169250488, |
|
"rewards/margins": 0.20379090309143066, |
|
"rewards/real": -3.337860107421875e-05, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 17.22730830301668, |
|
"learning_rate": 1.1666666666666667e-07, |
|
"logits/generated": -2.875734329223633, |
|
"logits/oppo_generated": -2.7672762870788574, |
|
"logits/oppo_real": -2.8780808448791504, |
|
"logits/real": -2.7644264698028564, |
|
"logps/generated": -70.91142272949219, |
|
"logps/oppo_gen": -70.58644104003906, |
|
"logps/oppo_real": -343.4704284667969, |
|
"logps/real": -343.3141174316406, |
|
"loss": 1.9968, |
|
"loss/gen": 1.9967502355575562, |
|
"loss/real": 0.00020247697830200195, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -0.32497692108154297, |
|
"rewards/margins": 0.48128724098205566, |
|
"rewards/real": 0.1563103199005127, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"grad_norm": 15.819281409300507, |
|
"learning_rate": 1.3333333333333334e-07, |
|
"logits/generated": -2.815218925476074, |
|
"logits/oppo_generated": -2.8374581336975098, |
|
"logits/oppo_real": -2.822021961212158, |
|
"logits/real": -2.830238103866577, |
|
"logps/generated": -107.48092651367188, |
|
"logps/oppo_gen": -106.73956298828125, |
|
"logps/oppo_real": -280.41741943359375, |
|
"logps/real": -280.51971435546875, |
|
"loss": 1.9918, |
|
"loss/gen": 1.9925864934921265, |
|
"loss/real": 0.0017677471041679382, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -0.7413442134857178, |
|
"rewards/margins": 0.6390435695648193, |
|
"rewards/real": -0.10230064392089844, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 16.905273409293585, |
|
"learning_rate": 1.5e-07, |
|
"logits/generated": -2.7615389823913574, |
|
"logits/oppo_generated": -2.8255615234375, |
|
"logits/oppo_real": -2.771684169769287, |
|
"logits/real": -2.815335273742676, |
|
"logps/generated": -87.6278076171875, |
|
"logps/oppo_gen": -85.86231994628906, |
|
"logps/oppo_real": -289.01318359375, |
|
"logps/real": -288.6315612792969, |
|
"loss": 1.9878, |
|
"loss/gen": 1.9823451042175293, |
|
"loss/real": 0.001967109739780426, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -1.7654941082000732, |
|
"rewards/margins": 2.147136688232422, |
|
"rewards/real": 0.38164258003234863, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"grad_norm": 16.47449070515203, |
|
"learning_rate": 1.6666666666666665e-07, |
|
"logits/generated": -3.1344149112701416, |
|
"logits/oppo_generated": -2.7394165992736816, |
|
"logits/oppo_real": -3.1553921699523926, |
|
"logits/real": -2.717817544937134, |
|
"logps/generated": -77.48875427246094, |
|
"logps/oppo_gen": -74.47514343261719, |
|
"logps/oppo_real": -366.370361328125, |
|
"logps/real": -366.13739013671875, |
|
"loss": 1.9771, |
|
"loss/gen": 1.9698638916015625, |
|
"loss/real": 0.002904340624809265, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.013608455657959, |
|
"rewards/margins": 3.2465546131134033, |
|
"rewards/real": 0.23294615745544434, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 16.775265896504816, |
|
"learning_rate": 1.833333333333333e-07, |
|
"logits/generated": -2.108006000518799, |
|
"logits/oppo_generated": -2.1468427181243896, |
|
"logits/oppo_real": -2.142064094543457, |
|
"logits/real": -2.117852210998535, |
|
"logps/generated": -81.89228820800781, |
|
"logps/oppo_gen": -78.08332824707031, |
|
"logps/oppo_real": -437.152587890625, |
|
"logps/real": -437.1942443847656, |
|
"loss": 1.9669, |
|
"loss/gen": 1.9619104862213135, |
|
"loss/real": 0.004711121320724487, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -3.8089609146118164, |
|
"rewards/margins": 3.76729679107666, |
|
"rewards/real": -0.04166412353515625, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"grad_norm": 15.954718888453357, |
|
"learning_rate": 2e-07, |
|
"logits/generated": -2.9154043197631836, |
|
"logits/oppo_generated": -2.902646064758301, |
|
"logits/oppo_real": -2.953411817550659, |
|
"logits/real": -2.8594038486480713, |
|
"logps/generated": -77.6299819946289, |
|
"logps/oppo_gen": -72.53976440429688, |
|
"logps/oppo_real": -310.7004089355469, |
|
"logps/real": -310.427734375, |
|
"loss": 1.961, |
|
"loss/gen": 1.9490978717803955, |
|
"loss/real": 0.0060840025544166565, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -5.090216636657715, |
|
"rewards/margins": 5.362889289855957, |
|
"rewards/real": 0.2726726531982422, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 17.536179990025712, |
|
"learning_rate": 2.1666666666666667e-07, |
|
"logits/generated": -2.9157583713531494, |
|
"logits/oppo_generated": -2.947140693664551, |
|
"logits/oppo_real": -2.9634807109832764, |
|
"logits/real": -2.8917417526245117, |
|
"logps/generated": -83.36225891113281, |
|
"logps/oppo_gen": -74.80116271972656, |
|
"logps/oppo_real": -309.46124267578125, |
|
"logps/real": -309.94171142578125, |
|
"loss": 1.9385, |
|
"loss/gen": 1.9143891334533691, |
|
"loss/real": 0.010849758982658386, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -8.56109619140625, |
|
"rewards/margins": 8.080650329589844, |
|
"rewards/real": -0.48044562339782715, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 18.055822491099047, |
|
"learning_rate": 2.3333333333333333e-07, |
|
"logits/generated": -2.398920774459839, |
|
"logits/oppo_generated": -2.6668543815612793, |
|
"logits/oppo_real": -2.47564697265625, |
|
"logits/real": -2.5944724082946777, |
|
"logps/generated": -77.91998291015625, |
|
"logps/oppo_gen": -67.190673828125, |
|
"logps/oppo_real": -285.60797119140625, |
|
"logps/real": -287.39215087890625, |
|
"loss": 1.9138, |
|
"loss/gen": 1.892707109451294, |
|
"loss/real": 0.023035116493701935, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -10.729303359985352, |
|
"rewards/margins": 8.945160865783691, |
|
"rewards/real": -1.7841424942016602, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"grad_norm": 16.8409722750193, |
|
"learning_rate": 2.5e-07, |
|
"logits/generated": -3.0429744720458984, |
|
"logits/oppo_generated": -2.7376956939697266, |
|
"logits/oppo_real": -3.1153059005737305, |
|
"logits/real": -2.66239595413208, |
|
"logps/generated": -108.49114990234375, |
|
"logps/oppo_gen": -93.65745544433594, |
|
"logps/oppo_real": -173.968994140625, |
|
"logps/real": -176.7364044189453, |
|
"loss": 1.9, |
|
"loss/gen": 1.8516631126403809, |
|
"loss/real": 0.030972033739089966, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -14.833693504333496, |
|
"rewards/margins": 12.066278457641602, |
|
"rewards/real": -2.7674155235290527, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 17.0265708165071, |
|
"learning_rate": 2.6666666666666667e-07, |
|
"logits/generated": -2.807745933532715, |
|
"logits/oppo_generated": -2.6699156761169434, |
|
"logits/oppo_real": -2.8930060863494873, |
|
"logits/real": -2.5801193714141846, |
|
"logps/generated": -62.11570739746094, |
|
"logps/oppo_gen": -50.189754486083984, |
|
"logps/oppo_real": -197.0562286376953, |
|
"logps/real": -198.63250732421875, |
|
"loss": 1.8804, |
|
"loss/gen": 1.8807404041290283, |
|
"loss/real": 0.03226040303707123, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -11.925955772399902, |
|
"rewards/margins": 10.349677085876465, |
|
"rewards/real": -1.5762791633605957, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"grad_norm": 15.942840997675052, |
|
"learning_rate": 2.833333333333333e-07, |
|
"logits/generated": -2.883525848388672, |
|
"logits/oppo_generated": -2.8113152980804443, |
|
"logits/oppo_real": -2.997610330581665, |
|
"logits/real": -2.701076030731201, |
|
"logps/generated": -77.23695373535156, |
|
"logps/oppo_gen": -59.91856384277344, |
|
"logps/oppo_real": -175.6089324951172, |
|
"logps/real": -180.22738647460938, |
|
"loss": 1.8703, |
|
"loss/gen": 1.8268163204193115, |
|
"loss/real": 0.05111686885356903, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -17.318382263183594, |
|
"rewards/margins": 12.699928283691406, |
|
"rewards/real": -4.618453025817871, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 18.193819711198465, |
|
"learning_rate": 3e-07, |
|
"logits/generated": -2.71798038482666, |
|
"logits/oppo_generated": -2.712057113647461, |
|
"logits/oppo_real": -2.83805513381958, |
|
"logits/real": -2.592156410217285, |
|
"logps/generated": -108.12179565429688, |
|
"logps/oppo_gen": -84.5518798828125, |
|
"logps/oppo_real": -331.96221923828125, |
|
"logps/real": -337.7485656738281, |
|
"loss": 1.8307, |
|
"loss/gen": 1.76430082321167, |
|
"loss/real": 0.058562956750392914, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -23.569915771484375, |
|
"rewards/margins": 17.783584594726562, |
|
"rewards/real": -5.786332130432129, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 16.996427272784786, |
|
"learning_rate": 3.166666666666666e-07, |
|
"logits/generated": -2.2170791625976562, |
|
"logits/oppo_generated": -2.4313888549804688, |
|
"logits/oppo_real": -2.3368191719055176, |
|
"logits/real": -2.3218801021575928, |
|
"logps/generated": -98.60990142822266, |
|
"logps/oppo_gen": -70.7446060180664, |
|
"logps/oppo_real": -186.56976318359375, |
|
"logps/real": -192.2266082763672, |
|
"loss": 1.8198, |
|
"loss/gen": 1.7213470935821533, |
|
"loss/real": 0.05736871063709259, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -27.865299224853516, |
|
"rewards/margins": 22.208459854125977, |
|
"rewards/real": -5.6568403244018555, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"grad_norm": 16.67077026711208, |
|
"learning_rate": 3.333333333333333e-07, |
|
"logits/generated": -2.77504825592041, |
|
"logits/oppo_generated": -2.8222999572753906, |
|
"logits/oppo_real": -2.956730842590332, |
|
"logits/real": -2.662292957305908, |
|
"logps/generated": -79.92071533203125, |
|
"logps/oppo_gen": -55.461936950683594, |
|
"logps/oppo_real": -125.98847198486328, |
|
"logps/real": -132.84556579589844, |
|
"loss": 1.7945, |
|
"loss/gen": 1.7554123401641846, |
|
"loss/real": 0.07477347552776337, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -24.458776473999023, |
|
"rewards/margins": 17.601680755615234, |
|
"rewards/real": -6.857094764709473, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 16.22325115917978, |
|
"learning_rate": 3.5e-07, |
|
"logits/generated": -2.4703292846679688, |
|
"logits/oppo_generated": -2.9076757431030273, |
|
"logits/oppo_real": -2.661245822906494, |
|
"logits/real": -2.7052745819091797, |
|
"logps/generated": -103.34319305419922, |
|
"logps/oppo_gen": -71.46342468261719, |
|
"logps/oppo_real": -293.69677734375, |
|
"logps/real": -296.63507080078125, |
|
"loss": 1.7719, |
|
"loss/gen": 1.6812022924423218, |
|
"loss/real": 0.05883955955505371, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -31.879772186279297, |
|
"rewards/margins": 28.941482543945312, |
|
"rewards/real": -2.938288688659668, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"grad_norm": 15.655657911240558, |
|
"learning_rate": 3.666666666666666e-07, |
|
"logits/generated": -2.6560888290405273, |
|
"logits/oppo_generated": -3.018123149871826, |
|
"logits/oppo_real": -2.837935447692871, |
|
"logits/real": -2.8067123889923096, |
|
"logps/generated": -81.89646911621094, |
|
"logps/oppo_gen": -51.06623458862305, |
|
"logps/oppo_real": -151.72972106933594, |
|
"logps/real": -171.45449829101562, |
|
"loss": 1.7534, |
|
"loss/gen": 1.6916977167129517, |
|
"loss/real": 0.19724780321121216, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -30.83022689819336, |
|
"rewards/margins": 11.105447769165039, |
|
"rewards/real": -19.724781036376953, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 15.655657911240558, |
|
"learning_rate": 3.666666666666666e-07, |
|
"logits/generated": -2.387822151184082, |
|
"logits/oppo_generated": -2.7700376510620117, |
|
"logits/oppo_real": -2.6328747272491455, |
|
"logits/real": -2.5694613456726074, |
|
"logps/generated": -121.05097961425781, |
|
"logps/oppo_gen": -72.09120178222656, |
|
"logps/oppo_real": -411.427978515625, |
|
"logps/real": -412.50714111328125, |
|
"loss": 1.6877, |
|
"loss/gen": 1.5104023218154907, |
|
"loss/real": 0.0984681025147438, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -48.95977020263672, |
|
"rewards/margins": 47.880558013916016, |
|
"rewards/real": -1.0792131423950195, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"grad_norm": 16.840291816557006, |
|
"learning_rate": 3.8333333333333335e-07, |
|
"logits/generated": -2.6876704692840576, |
|
"logits/oppo_generated": -2.91198468208313, |
|
"logits/oppo_real": -2.9211230278015137, |
|
"logits/real": -2.680572509765625, |
|
"logps/generated": -127.4861831665039, |
|
"logps/oppo_gen": -82.21741485595703, |
|
"logps/oppo_real": -301.3589172363281, |
|
"logps/real": -309.1620178222656, |
|
"loss": 1.7013, |
|
"loss/gen": 1.5473122596740723, |
|
"loss/real": 0.10803677141666412, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -45.268768310546875, |
|
"rewards/margins": 37.46567153930664, |
|
"rewards/real": -7.8031005859375, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 17.95870813154182, |
|
"learning_rate": 4e-07, |
|
"logits/generated": -2.7156505584716797, |
|
"logits/oppo_generated": -2.4022648334503174, |
|
"logits/oppo_real": -2.97650146484375, |
|
"logits/real": -2.1997687816619873, |
|
"logps/generated": -136.54647827148438, |
|
"logps/oppo_gen": -99.30915832519531, |
|
"logps/oppo_real": -226.3162841796875, |
|
"logps/real": -240.60678100585938, |
|
"loss": 1.6691, |
|
"loss/gen": 1.627626895904541, |
|
"loss/real": 0.14702296257019043, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -37.23731231689453, |
|
"rewards/margins": 22.946819305419922, |
|
"rewards/real": -14.29049301147461, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 18.000802763945956, |
|
"learning_rate": 4.1666666666666667e-07, |
|
"logits/generated": -2.7124738693237305, |
|
"logits/oppo_generated": -2.854034900665283, |
|
"logits/oppo_real": -2.9424033164978027, |
|
"logits/real": -2.666820526123047, |
|
"logps/generated": -94.56930541992188, |
|
"logps/oppo_gen": -54.3837890625, |
|
"logps/oppo_real": -252.91123962402344, |
|
"logps/real": -263.7140197753906, |
|
"loss": 1.635, |
|
"loss/gen": 1.5981448888778687, |
|
"loss/real": 0.11872847378253937, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -40.185516357421875, |
|
"rewards/margins": 29.382728576660156, |
|
"rewards/real": -10.802785873413086, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"grad_norm": 17.376117039182688, |
|
"learning_rate": 4.3333333333333335e-07, |
|
"logits/generated": -2.646808385848999, |
|
"logits/oppo_generated": -2.9263906478881836, |
|
"logits/oppo_real": -2.9535346031188965, |
|
"logits/real": -2.6604020595550537, |
|
"logps/generated": -133.86170959472656, |
|
"logps/oppo_gen": -78.93435668945312, |
|
"logps/oppo_real": -298.2490234375, |
|
"logps/real": -312.76690673828125, |
|
"loss": 1.629, |
|
"loss/gen": 1.4507265090942383, |
|
"loss/real": 0.1622442901134491, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -54.927345275878906, |
|
"rewards/margins": 40.409461975097656, |
|
"rewards/real": -14.517885208129883, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 19.314468826625482, |
|
"learning_rate": 4.5e-07, |
|
"logits/generated": -2.7972543239593506, |
|
"logits/oppo_generated": -2.9521539211273193, |
|
"logits/oppo_real": -3.0699048042297363, |
|
"logits/real": -2.6584794521331787, |
|
"logps/generated": -189.65911865234375, |
|
"logps/oppo_gen": -136.80690002441406, |
|
"logps/oppo_real": -344.64990234375, |
|
"logps/real": -362.10601806640625, |
|
"loss": 1.5829, |
|
"loss/gen": 1.4714778661727905, |
|
"loss/real": 0.20715071260929108, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -52.85221862792969, |
|
"rewards/margins": 35.396095275878906, |
|
"rewards/real": -17.45612144470215, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"grad_norm": 19.659759256745826, |
|
"learning_rate": 4.6666666666666666e-07, |
|
"logits/generated": -2.6684279441833496, |
|
"logits/oppo_generated": -2.8447458744049072, |
|
"logits/oppo_real": -2.998192548751831, |
|
"logits/real": -2.5552549362182617, |
|
"logps/generated": -146.35748291015625, |
|
"logps/oppo_gen": -79.24800109863281, |
|
"logps/oppo_real": -401.9757385253906, |
|
"logps/real": -426.55157470703125, |
|
"loss": 1.5299, |
|
"loss/gen": 1.3289053440093994, |
|
"loss/real": 0.2510119676589966, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -67.10946655273438, |
|
"rewards/margins": 42.533626556396484, |
|
"rewards/real": -24.575847625732422, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 21.872934472794725, |
|
"learning_rate": 4.833333333333333e-07, |
|
"logits/generated": -2.586422920227051, |
|
"logits/oppo_generated": -2.942030906677246, |
|
"logits/oppo_real": -2.9536867141723633, |
|
"logits/real": -2.602694511413574, |
|
"logps/generated": -140.24705505371094, |
|
"logps/oppo_gen": -62.21235656738281, |
|
"logps/oppo_real": -296.8402404785156, |
|
"logps/real": -320.66766357421875, |
|
"loss": 1.4528, |
|
"loss/gen": 1.2196528911590576, |
|
"loss/real": 0.259676456451416, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -78.03470611572266, |
|
"rewards/margins": 54.207305908203125, |
|
"rewards/real": -23.8273983001709, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 22.019553078421858, |
|
"learning_rate": 5e-07, |
|
"logits/generated": -2.2861862182617188, |
|
"logits/oppo_generated": -2.792217493057251, |
|
"logits/oppo_real": -2.680948257446289, |
|
"logits/real": -2.4454588890075684, |
|
"logps/generated": -128.45945739746094, |
|
"logps/oppo_gen": -49.044715881347656, |
|
"logps/oppo_real": -183.3726348876953, |
|
"logps/real": -205.66845703125, |
|
"loss": 1.4374, |
|
"loss/gen": 1.2058525085449219, |
|
"loss/real": 0.26081162691116333, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -79.41474914550781, |
|
"rewards/margins": 57.11891555786133, |
|
"rewards/real": -22.29583168029785, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"grad_norm": 21.563572776755652, |
|
"learning_rate": 4.996438746438746e-07, |
|
"logits/generated": -2.352074146270752, |
|
"logits/oppo_generated": -2.5968940258026123, |
|
"logits/oppo_real": -2.84472393989563, |
|
"logits/real": -2.1995410919189453, |
|
"logps/generated": -192.30780029296875, |
|
"logps/oppo_gen": -96.46727752685547, |
|
"logps/oppo_real": -441.2087097167969, |
|
"logps/real": -454.900390625, |
|
"loss": 1.3751, |
|
"loss/gen": 1.1064567565917969, |
|
"loss/real": 0.17831739783287048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -95.84051513671875, |
|
"rewards/margins": 82.14884948730469, |
|
"rewards/real": -13.691666603088379, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 27.510149937966077, |
|
"learning_rate": 4.992877492877492e-07, |
|
"logits/generated": -2.568110466003418, |
|
"logits/oppo_generated": -3.097993850708008, |
|
"logits/oppo_real": -3.161780834197998, |
|
"logits/real": -2.603790283203125, |
|
"logps/generated": -209.0296630859375, |
|
"logps/oppo_gen": -86.33152770996094, |
|
"logps/oppo_real": -374.5130615234375, |
|
"logps/real": -400.4665832519531, |
|
"loss": 1.2577, |
|
"loss/gen": 0.7730186581611633, |
|
"loss/real": 0.2766711115837097, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -122.69813537597656, |
|
"rewards/margins": 96.74461364746094, |
|
"rewards/real": -25.95351791381836, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"grad_norm": 27.09461729076114, |
|
"learning_rate": 4.98931623931624e-07, |
|
"logits/generated": -2.2019739151000977, |
|
"logits/oppo_generated": -2.648486614227295, |
|
"logits/oppo_real": -2.7488012313842773, |
|
"logits/real": -2.1398563385009766, |
|
"logps/generated": -196.87429809570312, |
|
"logps/oppo_gen": -78.30477142333984, |
|
"logps/oppo_real": -363.86407470703125, |
|
"logps/real": -402.26739501953125, |
|
"loss": 1.2019, |
|
"loss/gen": 0.8143048882484436, |
|
"loss/real": 0.3951404094696045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -118.56951904296875, |
|
"rewards/margins": 80.16621398925781, |
|
"rewards/real": -38.403289794921875, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 27.1369816207399, |
|
"learning_rate": 4.985754985754986e-07, |
|
"logits/generated": -2.157773017883301, |
|
"logits/oppo_generated": -2.864193916320801, |
|
"logits/oppo_real": -2.7761850357055664, |
|
"logits/real": -2.3227579593658447, |
|
"logps/generated": -172.39085388183594, |
|
"logps/oppo_gen": -60.6450309753418, |
|
"logps/oppo_real": -320.1565856933594, |
|
"logps/real": -337.9253845214844, |
|
"loss": 1.1863, |
|
"loss/gen": 0.9242483377456665, |
|
"loss/real": 0.19020405411720276, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -111.7458267211914, |
|
"rewards/margins": 93.97701263427734, |
|
"rewards/real": -17.768808364868164, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"grad_norm": 24.71832653121781, |
|
"learning_rate": 4.982193732193732e-07, |
|
"logits/generated": -2.3361663818359375, |
|
"logits/oppo_generated": -2.812058210372925, |
|
"logits/oppo_real": -2.982236862182617, |
|
"logits/real": -2.289778470993042, |
|
"logps/generated": -202.0223388671875, |
|
"logps/oppo_gen": -90.06674194335938, |
|
"logps/oppo_real": -176.9713592529297, |
|
"logps/real": -221.30557250976562, |
|
"loss": 1.1883, |
|
"loss/gen": 0.8804440498352051, |
|
"loss/real": 0.46414560079574585, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -111.95559692382812, |
|
"rewards/margins": 67.62137603759766, |
|
"rewards/real": -44.3342170715332, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 25.356203831684, |
|
"learning_rate": 4.978632478632478e-07, |
|
"logits/generated": -2.2286205291748047, |
|
"logits/oppo_generated": -2.9253015518188477, |
|
"logits/oppo_real": -2.9079301357269287, |
|
"logits/real": -2.3039026260375977, |
|
"logps/generated": -171.630859375, |
|
"logps/oppo_gen": -54.79414367675781, |
|
"logps/oppo_real": -186.92176818847656, |
|
"logps/real": -248.6990966796875, |
|
"loss": 1.1143, |
|
"loss/gen": 0.8316328525543213, |
|
"loss/real": 0.6177734136581421, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -116.83671569824219, |
|
"rewards/margins": 55.05937194824219, |
|
"rewards/real": -61.77734375, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 27.712655447875367, |
|
"learning_rate": 4.975071225071225e-07, |
|
"logits/generated": -2.067673683166504, |
|
"logits/oppo_generated": -2.9949498176574707, |
|
"logits/oppo_real": -2.9107003211975098, |
|
"logits/real": -2.3845181465148926, |
|
"logps/generated": -286.51165771484375, |
|
"logps/oppo_gen": -79.9820785522461, |
|
"logps/oppo_real": -404.1100158691406, |
|
"logps/real": -439.1029968261719, |
|
"loss": 1.0441, |
|
"loss/gen": 0.4001755118370056, |
|
"loss/real": 0.3547167181968689, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -206.529541015625, |
|
"rewards/margins": 171.53656005859375, |
|
"rewards/real": -34.99298858642578, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"grad_norm": 26.747401031017247, |
|
"learning_rate": 4.971509971509972e-07, |
|
"logits/generated": -1.7710440158843994, |
|
"logits/oppo_generated": -2.4440221786499023, |
|
"logits/oppo_real": -2.3998050689697266, |
|
"logits/real": -1.8367252349853516, |
|
"logps/generated": -312.6555480957031, |
|
"logps/oppo_gen": -93.22187805175781, |
|
"logps/oppo_real": -290.8685302734375, |
|
"logps/real": -330.1634521484375, |
|
"loss": 0.9874, |
|
"loss/gen": 0.40501296520233154, |
|
"loss/real": 0.39968231320381165, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -219.43365478515625, |
|
"rewards/margins": 180.13876342773438, |
|
"rewards/real": -39.29491424560547, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 29.940363291114327, |
|
"learning_rate": 4.967948717948718e-07, |
|
"logits/generated": -2.0964088439941406, |
|
"logits/oppo_generated": -2.9232547283172607, |
|
"logits/oppo_real": -2.7114880084991455, |
|
"logits/real": -2.3123269081115723, |
|
"logps/generated": -256.84454345703125, |
|
"logps/oppo_gen": -64.50846862792969, |
|
"logps/oppo_real": -239.8323974609375, |
|
"logps/real": -305.34417724609375, |
|
"loss": 0.9685, |
|
"loss/gen": 0.41360723972320557, |
|
"loss/real": 0.6562252640724182, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -192.33609008789062, |
|
"rewards/margins": 126.8243179321289, |
|
"rewards/real": -65.51176452636719, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"grad_norm": 26.65060984072996, |
|
"learning_rate": 4.964387464387464e-07, |
|
"logits/generated": -2.263786792755127, |
|
"logits/oppo_generated": -2.741456985473633, |
|
"logits/oppo_real": -2.9938759803771973, |
|
"logits/real": -2.225804328918457, |
|
"logps/generated": -184.13873291015625, |
|
"logps/oppo_gen": -58.174400329589844, |
|
"logps/oppo_real": -258.21685791015625, |
|
"logps/real": -307.77520751953125, |
|
"loss": 0.9712, |
|
"loss/gen": 0.7734701633453369, |
|
"loss/real": 0.5007840394973755, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -125.96434783935547, |
|
"rewards/margins": 76.40599060058594, |
|
"rewards/real": -49.558353424072266, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 28.4577902373904, |
|
"learning_rate": 4.96082621082621e-07, |
|
"logits/generated": -2.171962022781372, |
|
"logits/oppo_generated": -2.814079761505127, |
|
"logits/oppo_real": -2.964923620223999, |
|
"logits/real": -2.236274480819702, |
|
"logps/generated": -279.45623779296875, |
|
"logps/oppo_gen": -78.5189208984375, |
|
"logps/oppo_real": -288.56396484375, |
|
"logps/real": -338.4586181640625, |
|
"loss": 0.8781, |
|
"loss/gen": 0.38681352138519287, |
|
"loss/real": 0.5231560468673706, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -200.93731689453125, |
|
"rewards/margins": 151.0426788330078, |
|
"rewards/real": -49.89463806152344, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 28.363362753377597, |
|
"learning_rate": 4.957264957264958e-07, |
|
"logits/generated": -2.1406655311584473, |
|
"logits/oppo_generated": -2.7121076583862305, |
|
"logits/oppo_real": -2.932806968688965, |
|
"logits/real": -2.1540122032165527, |
|
"logps/generated": -254.42098999023438, |
|
"logps/oppo_gen": -72.10917663574219, |
|
"logps/oppo_real": -299.3392333984375, |
|
"logps/real": -366.06121826171875, |
|
"loss": 0.8697, |
|
"loss/gen": 0.38246485590934753, |
|
"loss/real": 0.6711124181747437, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -182.31182861328125, |
|
"rewards/margins": 115.58984375, |
|
"rewards/real": -66.72196960449219, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 29.28652212909496, |
|
"learning_rate": 4.953703703703703e-07, |
|
"logits/generated": -2.362544059753418, |
|
"logits/oppo_generated": -2.814209461212158, |
|
"logits/oppo_real": -3.157527208328247, |
|
"logits/real": -2.4037039279937744, |
|
"logps/generated": -309.79132080078125, |
|
"logps/oppo_gen": -80.24543762207031, |
|
"logps/oppo_real": -294.9969482421875, |
|
"logps/real": -339.64544677734375, |
|
"loss": 0.7412, |
|
"loss/gen": 0.41209107637405396, |
|
"loss/real": 0.4473879337310791, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -229.5458984375, |
|
"rewards/margins": 184.89743041992188, |
|
"rewards/real": -44.648468017578125, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 37.235111000384265, |
|
"learning_rate": 4.95014245014245e-07, |
|
"logits/generated": -2.274956464767456, |
|
"logits/oppo_generated": -2.9343652725219727, |
|
"logits/oppo_real": -2.7617945671081543, |
|
"logits/real": -2.470684051513672, |
|
"logps/generated": -271.6064758300781, |
|
"logps/oppo_gen": -82.74765014648438, |
|
"logps/oppo_real": -315.32562255859375, |
|
"logps/real": -348.8078308105469, |
|
"loss": 0.8359, |
|
"loss/gen": 0.3331334590911865, |
|
"loss/real": 0.33817416429519653, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -188.85882568359375, |
|
"rewards/margins": 155.37660217285156, |
|
"rewards/real": -33.482208251953125, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 24.169072033670428, |
|
"learning_rate": 4.946581196581196e-07, |
|
"logits/generated": -2.1854918003082275, |
|
"logits/oppo_generated": -2.805569648742676, |
|
"logits/oppo_real": -2.7846250534057617, |
|
"logits/real": -2.2574825286865234, |
|
"logps/generated": -207.7048797607422, |
|
"logps/oppo_gen": -45.456573486328125, |
|
"logps/oppo_real": -161.39598083496094, |
|
"logps/real": -193.92062377929688, |
|
"loss": 0.7238, |
|
"loss/gen": 0.49297964572906494, |
|
"loss/real": 0.37008020281791687, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -162.24830627441406, |
|
"rewards/margins": 129.72366333007812, |
|
"rewards/real": -32.52463150024414, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 28.44118708927029, |
|
"learning_rate": 4.943019943019943e-07, |
|
"logits/generated": -1.998687982559204, |
|
"logits/oppo_generated": -2.7444612979888916, |
|
"logits/oppo_real": -2.7595162391662598, |
|
"logits/real": -2.086247444152832, |
|
"logps/generated": -203.22409057617188, |
|
"logps/oppo_gen": -50.193504333496094, |
|
"logps/oppo_real": -148.25294494628906, |
|
"logps/real": -181.40602111816406, |
|
"loss": 0.7164, |
|
"loss/gen": 0.5076989531517029, |
|
"loss/real": 0.35455718636512756, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -153.0305938720703, |
|
"rewards/margins": 119.87752532958984, |
|
"rewards/real": -33.15306854248047, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"grad_norm": 28.44118708927029, |
|
"learning_rate": 4.943019943019943e-07, |
|
"logits/generated": -1.962064504623413, |
|
"logits/oppo_generated": -2.660369396209717, |
|
"logits/oppo_real": -2.6082496643066406, |
|
"logits/real": -2.004966974258423, |
|
"logps/generated": -199.8478240966797, |
|
"logps/oppo_gen": -55.80210876464844, |
|
"logps/oppo_real": -201.49038696289062, |
|
"logps/real": -236.99850463867188, |
|
"loss": 0.61, |
|
"loss/gen": 0.625639796257019, |
|
"loss/real": 0.3782804012298584, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -144.04571533203125, |
|
"rewards/margins": 108.53761291503906, |
|
"rewards/real": -35.50811767578125, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 34.77088794433499, |
|
"learning_rate": 4.93945868945869e-07, |
|
"logits/generated": -2.1952624320983887, |
|
"logits/oppo_generated": -2.746832847595215, |
|
"logits/oppo_real": -2.973560333251953, |
|
"logits/real": -2.191551685333252, |
|
"logps/generated": -218.537109375, |
|
"logps/oppo_gen": -77.28608703613281, |
|
"logps/oppo_real": -547.3628540039062, |
|
"logps/real": -573.14208984375, |
|
"loss": 0.6871, |
|
"loss/gen": 0.6249206066131592, |
|
"loss/real": 0.30037403106689453, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -141.2510223388672, |
|
"rewards/margins": 115.47174835205078, |
|
"rewards/real": -25.77927017211914, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 29.88689616704953, |
|
"learning_rate": 4.935897435897436e-07, |
|
"logits/generated": -1.6832900047302246, |
|
"logits/oppo_generated": -2.664555072784424, |
|
"logits/oppo_real": -2.6400251388549805, |
|
"logits/real": -2.0845460891723633, |
|
"logps/generated": -307.5957336425781, |
|
"logps/oppo_gen": -78.57785034179688, |
|
"logps/oppo_real": -398.628662109375, |
|
"logps/real": -400.03839111328125, |
|
"loss": 0.6084, |
|
"loss/gen": 0.14119790494441986, |
|
"loss/real": 0.09441090375185013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -229.01788330078125, |
|
"rewards/margins": 227.60816955566406, |
|
"rewards/real": -1.4097027778625488, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"grad_norm": 26.655489804517757, |
|
"learning_rate": 4.932336182336182e-07, |
|
"logits/generated": -2.235076665878296, |
|
"logits/oppo_generated": -2.638930320739746, |
|
"logits/oppo_real": -3.1015210151672363, |
|
"logits/real": -2.072552442550659, |
|
"logps/generated": -260.4788818359375, |
|
"logps/oppo_gen": -84.6130599975586, |
|
"logps/oppo_real": -310.54534912109375, |
|
"logps/real": -339.3001708984375, |
|
"loss": 0.6508, |
|
"loss/gen": 0.5073614716529846, |
|
"loss/real": 0.32746076583862305, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -175.86582946777344, |
|
"rewards/margins": 147.1110076904297, |
|
"rewards/real": -28.754831314086914, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 51.166719683061565, |
|
"learning_rate": 4.928774928774928e-07, |
|
"logits/generated": -2.112302780151367, |
|
"logits/oppo_generated": -2.9305167198181152, |
|
"logits/oppo_real": -2.7986156940460205, |
|
"logits/real": -2.261821746826172, |
|
"logps/generated": -329.22186279296875, |
|
"logps/oppo_gen": -55.247596740722656, |
|
"logps/oppo_real": -159.6094970703125, |
|
"logps/real": -214.50485229492188, |
|
"loss": 0.5733, |
|
"loss/gen": 0.1604616641998291, |
|
"loss/real": 0.5549860000610352, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -273.9742431640625, |
|
"rewards/margins": 219.07891845703125, |
|
"rewards/real": -54.895347595214844, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"grad_norm": 54.18406446361213, |
|
"learning_rate": 4.925213675213676e-07, |
|
"logits/generated": -1.972760558128357, |
|
"logits/oppo_generated": -2.733177900314331, |
|
"logits/oppo_real": -3.0261659622192383, |
|
"logits/real": -2.0272536277770996, |
|
"logps/generated": -270.2849426269531, |
|
"logps/oppo_gen": -77.4105453491211, |
|
"logps/oppo_real": -291.50042724609375, |
|
"logps/real": -314.6445617675781, |
|
"loss": 0.6405, |
|
"loss/gen": 0.24113653600215912, |
|
"loss/real": 0.25997018814086914, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -192.8743896484375, |
|
"rewards/margins": 169.73028564453125, |
|
"rewards/real": -23.144100189208984, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 31.552650652260482, |
|
"learning_rate": 4.921652421652421e-07, |
|
"logits/generated": -1.6506314277648926, |
|
"logits/oppo_generated": -2.70068359375, |
|
"logits/oppo_real": -2.622352361679077, |
|
"logits/real": -1.8686270713806152, |
|
"logps/generated": -331.11773681640625, |
|
"logps/oppo_gen": -66.53448486328125, |
|
"logps/oppo_real": -142.07913208007812, |
|
"logps/real": -227.437744140625, |
|
"loss": 0.6586, |
|
"loss/gen": 0.03464512526988983, |
|
"loss/real": 0.853585958480835, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -264.5832824707031, |
|
"rewards/margins": 179.22467041015625, |
|
"rewards/real": -85.35860443115234, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 27.14020221317216, |
|
"learning_rate": 4.918091168091168e-07, |
|
"logits/generated": -2.1259684562683105, |
|
"logits/oppo_generated": -3.0608558654785156, |
|
"logits/oppo_real": -3.0881457328796387, |
|
"logits/real": -2.3352560997009277, |
|
"logps/generated": -275.365478515625, |
|
"logps/oppo_gen": -78.30126953125, |
|
"logps/oppo_real": -296.7585144042969, |
|
"logps/real": -317.5572509765625, |
|
"loss": 0.567, |
|
"loss/gen": 0.22492240369319916, |
|
"loss/real": 0.27364999055862427, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -197.064208984375, |
|
"rewards/margins": 176.26547241210938, |
|
"rewards/real": -20.798734664916992, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 37.116987705997744, |
|
"learning_rate": 4.914529914529914e-07, |
|
"logits/generated": -2.0405826568603516, |
|
"logits/oppo_generated": -2.904336929321289, |
|
"logits/oppo_real": -3.0007967948913574, |
|
"logits/real": -2.274019956588745, |
|
"logps/generated": -340.4947509765625, |
|
"logps/oppo_gen": -78.76142883300781, |
|
"logps/oppo_real": -321.17315673828125, |
|
"logps/real": -368.8492431640625, |
|
"loss": 0.5723, |
|
"loss/gen": 0.07833881676197052, |
|
"loss/real": 0.5737317800521851, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -261.7333068847656, |
|
"rewards/margins": 214.0572509765625, |
|
"rewards/real": -47.676055908203125, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 28.903528165773942, |
|
"learning_rate": 4.910968660968661e-07, |
|
"logits/generated": -2.1163697242736816, |
|
"logits/oppo_generated": -3.0246148109436035, |
|
"logits/oppo_real": -3.155604839324951, |
|
"logits/real": -2.3015458583831787, |
|
"logps/generated": -325.52911376953125, |
|
"logps/oppo_gen": -99.78816986083984, |
|
"logps/oppo_real": -357.6624755859375, |
|
"logps/real": -369.4306640625, |
|
"loss": 0.4923, |
|
"loss/gen": 0.08446104824542999, |
|
"loss/real": 0.1771288365125656, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -225.74095153808594, |
|
"rewards/margins": 213.97276306152344, |
|
"rewards/real": -11.768176078796387, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"grad_norm": 42.85362560553147, |
|
"learning_rate": 4.907407407407407e-07, |
|
"logits/generated": -1.962327480316162, |
|
"logits/oppo_generated": -2.718918800354004, |
|
"logits/oppo_real": -2.8950438499450684, |
|
"logits/real": -2.042083740234375, |
|
"logps/generated": -230.06312561035156, |
|
"logps/oppo_gen": -73.73533630371094, |
|
"logps/oppo_real": -276.2977294921875, |
|
"logps/real": -287.68817138671875, |
|
"loss": 0.4966, |
|
"loss/gen": 0.5271965861320496, |
|
"loss/real": 0.24724090099334717, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -156.3278045654297, |
|
"rewards/margins": 144.93739318847656, |
|
"rewards/real": -11.390399932861328, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 60.4540336582008, |
|
"learning_rate": 4.903846153846153e-07, |
|
"logits/generated": -2.031083822250366, |
|
"logits/oppo_generated": -2.7741386890411377, |
|
"logits/oppo_real": -2.8905487060546875, |
|
"logits/real": -2.146413803100586, |
|
"logps/generated": -242.85926818847656, |
|
"logps/oppo_gen": -70.42605590820312, |
|
"logps/oppo_real": -291.8798522949219, |
|
"logps/real": -347.0901794433594, |
|
"loss": 0.5204, |
|
"loss/gen": 0.3330785036087036, |
|
"loss/real": 0.5649424195289612, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -172.43319702148438, |
|
"rewards/margins": 117.2228775024414, |
|
"rewards/real": -55.21034240722656, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"grad_norm": 60.4540336582008, |
|
"learning_rate": 4.903846153846153e-07, |
|
"logits/generated": -1.8672581911087036, |
|
"logits/oppo_generated": -2.731257438659668, |
|
"logits/oppo_real": -2.804780960083008, |
|
"logits/real": -2.0895280838012695, |
|
"logps/generated": -339.7601013183594, |
|
"logps/oppo_gen": -143.67832946777344, |
|
"logps/oppo_real": -309.55450439453125, |
|
"logps/real": -320.901123046875, |
|
"loss": 0.3716, |
|
"loss/gen": 0.2359689623117447, |
|
"loss/real": 0.16374921798706055, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -196.081787109375, |
|
"rewards/margins": 184.73516845703125, |
|
"rewards/real": -11.34660816192627, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 36.390479081394865, |
|
"learning_rate": 4.9002849002849e-07, |
|
"logits/generated": -1.936166524887085, |
|
"logits/oppo_generated": -2.710496664047241, |
|
"logits/oppo_real": -2.980191707611084, |
|
"logits/real": -1.961893081665039, |
|
"logps/generated": -314.8875732421875, |
|
"logps/oppo_gen": -71.51214599609375, |
|
"logps/oppo_real": -284.34765625, |
|
"logps/real": -309.43060302734375, |
|
"loss": 0.4212, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.35151734948158264, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -243.37542724609375, |
|
"rewards/margins": 218.29248046875, |
|
"rewards/real": -25.082937240600586, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 68.84343437853207, |
|
"learning_rate": 4.896723646723647e-07, |
|
"logits/generated": -2.055253267288208, |
|
"logits/oppo_generated": -3.0934062004089355, |
|
"logits/oppo_real": -3.077010154724121, |
|
"logits/real": -2.3065128326416016, |
|
"logps/generated": -340.2142333984375, |
|
"logps/oppo_gen": -109.1805419921875, |
|
"logps/oppo_real": -348.23834228515625, |
|
"logps/real": -346.0427551269531, |
|
"loss": 0.5169, |
|
"loss/gen": 0.03457939624786377, |
|
"loss/real": 0.09117183089256287, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -231.03366088867188, |
|
"rewards/margins": 233.22927856445312, |
|
"rewards/real": 2.195611000061035, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"grad_norm": 34.09506041248864, |
|
"learning_rate": 4.893162393162393e-07, |
|
"logits/generated": -1.9534183740615845, |
|
"logits/oppo_generated": -2.838265895843506, |
|
"logits/oppo_real": -3.01387357711792, |
|
"logits/real": -2.075807809829712, |
|
"logps/generated": -265.85076904296875, |
|
"logps/oppo_gen": -75.5096206665039, |
|
"logps/oppo_real": -242.11915588378906, |
|
"logps/real": -281.80438232421875, |
|
"loss": 0.5159, |
|
"loss/gen": 0.18575912714004517, |
|
"loss/real": 0.5187460780143738, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -190.34115600585938, |
|
"rewards/margins": 150.65594482421875, |
|
"rewards/real": -39.68519973754883, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 34.22783840494459, |
|
"learning_rate": 4.889601139601139e-07, |
|
"logits/generated": -1.8958648443222046, |
|
"logits/oppo_generated": -2.786154270172119, |
|
"logits/oppo_real": -2.980445146560669, |
|
"logits/real": -1.9751079082489014, |
|
"logps/generated": -315.6554870605469, |
|
"logps/oppo_gen": -78.40753173828125, |
|
"logps/oppo_real": -188.29739379882812, |
|
"logps/real": -222.51780700683594, |
|
"loss": 0.4878, |
|
"loss/gen": 0.023022428154945374, |
|
"loss/real": 0.41188403964042664, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -237.24795532226562, |
|
"rewards/margins": 203.02752685546875, |
|
"rewards/real": -34.220428466796875, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"grad_norm": 35.71409671406464, |
|
"learning_rate": 4.886039886039886e-07, |
|
"logits/generated": -1.8381710052490234, |
|
"logits/oppo_generated": -2.8353500366210938, |
|
"logits/oppo_real": -2.788581371307373, |
|
"logits/real": -1.96791410446167, |
|
"logps/generated": -293.2041015625, |
|
"logps/oppo_gen": -74.27359008789062, |
|
"logps/oppo_real": -262.4258728027344, |
|
"logps/real": -312.2054443359375, |
|
"loss": 0.458, |
|
"loss/gen": 0.1743556559085846, |
|
"loss/real": 0.5692518949508667, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -218.93051147460938, |
|
"rewards/margins": 169.15093994140625, |
|
"rewards/real": -49.77956771850586, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 47.1891271449592, |
|
"learning_rate": 4.882478632478633e-07, |
|
"logits/generated": -1.840759515762329, |
|
"logits/oppo_generated": -2.8188014030456543, |
|
"logits/oppo_real": -2.757133960723877, |
|
"logits/real": -1.9096312522888184, |
|
"logps/generated": -285.27850341796875, |
|
"logps/oppo_gen": -55.317054748535156, |
|
"logps/oppo_real": -178.10824584960938, |
|
"logps/real": -215.8996124267578, |
|
"loss": 0.4616, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.41009002923965454, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -229.96144104003906, |
|
"rewards/margins": 192.17007446289062, |
|
"rewards/real": -37.79136276245117, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"grad_norm": 38.070165804919505, |
|
"learning_rate": 4.878917378917379e-07, |
|
"logits/generated": -1.9046831130981445, |
|
"logits/oppo_generated": -2.865746259689331, |
|
"logits/oppo_real": -2.85042142868042, |
|
"logits/real": -2.0951907634735107, |
|
"logps/generated": -316.7037658691406, |
|
"logps/oppo_gen": -101.81581115722656, |
|
"logps/oppo_real": -463.47314453125, |
|
"logps/real": -466.7184143066406, |
|
"loss": 0.4761, |
|
"loss/gen": 0.05637046694755554, |
|
"loss/real": 0.1776004135608673, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -214.88796997070312, |
|
"rewards/margins": 211.6427001953125, |
|
"rewards/real": -3.2452640533447266, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 36.46849560751033, |
|
"learning_rate": 4.875356125356125e-07, |
|
"logits/generated": -1.9553462266921997, |
|
"logits/oppo_generated": -2.9923882484436035, |
|
"logits/oppo_real": -2.813816547393799, |
|
"logits/real": -2.1159887313842773, |
|
"logps/generated": -345.8742980957031, |
|
"logps/oppo_gen": -78.51251220703125, |
|
"logps/oppo_real": -286.4658508300781, |
|
"logps/real": -274.63427734375, |
|
"loss": 0.4137, |
|
"loss/gen": 0.007427394390106201, |
|
"loss/real": 0.03321278840303421, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -267.3617858886719, |
|
"rewards/margins": 279.1933898925781, |
|
"rewards/real": 11.83156967163086, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 31.978947389991983, |
|
"learning_rate": 4.871794871794871e-07, |
|
"logits/generated": -2.1326825618743896, |
|
"logits/oppo_generated": -2.7725887298583984, |
|
"logits/oppo_real": -3.063380002975464, |
|
"logits/real": -2.051950216293335, |
|
"logps/generated": -261.63037109375, |
|
"logps/oppo_gen": -79.40229034423828, |
|
"logps/oppo_real": -383.419677734375, |
|
"logps/real": -401.0813293457031, |
|
"loss": 0.4048, |
|
"loss/gen": 0.24421586096286774, |
|
"loss/real": 0.2781957685947418, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -182.2280731201172, |
|
"rewards/margins": 164.56642150878906, |
|
"rewards/real": -17.661649703979492, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"grad_norm": 43.62784005350683, |
|
"learning_rate": 4.868233618233618e-07, |
|
"logits/generated": -2.1139259338378906, |
|
"logits/oppo_generated": -2.8321666717529297, |
|
"logits/oppo_real": -3.1668171882629395, |
|
"logits/real": -2.0823147296905518, |
|
"logps/generated": -363.62158203125, |
|
"logps/oppo_gen": -99.83964538574219, |
|
"logps/oppo_real": -322.6613464355469, |
|
"logps/real": -310.8186950683594, |
|
"loss": 0.3929, |
|
"loss/gen": 0.28169000148773193, |
|
"loss/real": 0.1750582456588745, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -263.7819519042969, |
|
"rewards/margins": 275.6246337890625, |
|
"rewards/real": 11.842658042907715, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 32.57134409346178, |
|
"learning_rate": 4.864672364672365e-07, |
|
"logits/generated": -2.2618470191955566, |
|
"logits/oppo_generated": -3.000812530517578, |
|
"logits/oppo_real": -3.1619484424591064, |
|
"logits/real": -2.332085609436035, |
|
"logps/generated": -293.44439697265625, |
|
"logps/oppo_gen": -83.82888793945312, |
|
"logps/oppo_real": -441.3746337890625, |
|
"logps/real": -436.41595458984375, |
|
"loss": 0.4213, |
|
"loss/gen": 0.061429619789123535, |
|
"loss/real": 0.08119938522577286, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -209.61550903320312, |
|
"rewards/margins": 214.57421875, |
|
"rewards/real": 4.958704948425293, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"grad_norm": 56.40902867117107, |
|
"learning_rate": 4.861111111111111e-07, |
|
"logits/generated": -1.467146635055542, |
|
"logits/oppo_generated": -2.4111037254333496, |
|
"logits/oppo_real": -2.622360944747925, |
|
"logits/real": -1.5239063501358032, |
|
"logps/generated": -283.9698791503906, |
|
"logps/oppo_gen": -94.29784393310547, |
|
"logps/oppo_real": -307.8828125, |
|
"logps/real": -318.51324462890625, |
|
"loss": 0.4075, |
|
"loss/gen": 0.1725415140390396, |
|
"loss/real": 0.24915428459644318, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -189.67201232910156, |
|
"rewards/margins": 179.04159545898438, |
|
"rewards/real": -10.630415916442871, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 57.75006764065303, |
|
"learning_rate": 4.857549857549857e-07, |
|
"logits/generated": -1.9342730045318604, |
|
"logits/oppo_generated": -2.7816574573516846, |
|
"logits/oppo_real": -2.923349380493164, |
|
"logits/real": -2.000138759613037, |
|
"logps/generated": -264.1120300292969, |
|
"logps/oppo_gen": -70.22672271728516, |
|
"logps/oppo_real": -286.0644836425781, |
|
"logps/real": -369.57379150390625, |
|
"loss": 0.414, |
|
"loss/gen": 0.20479409396648407, |
|
"loss/real": 0.8575762510299683, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -193.8852996826172, |
|
"rewards/margins": 110.37598419189453, |
|
"rewards/real": -83.50932312011719, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 115.49377688862721, |
|
"learning_rate": 4.853988603988603e-07, |
|
"logits/generated": -1.7300872802734375, |
|
"logits/oppo_generated": -2.624129056930542, |
|
"logits/oppo_real": -2.6314826011657715, |
|
"logits/real": -1.706296682357788, |
|
"logps/generated": -254.1842498779297, |
|
"logps/oppo_gen": -48.185340881347656, |
|
"logps/oppo_real": -148.66656494140625, |
|
"logps/real": -187.1585693359375, |
|
"loss": 0.3277, |
|
"loss/gen": 0.08585792779922485, |
|
"loss/real": 0.4419286251068115, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -205.9989013671875, |
|
"rewards/margins": 167.50689697265625, |
|
"rewards/real": -38.492000579833984, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"grad_norm": 63.99712937592885, |
|
"learning_rate": 4.850427350427351e-07, |
|
"logits/generated": -1.8802506923675537, |
|
"logits/oppo_generated": -2.668670177459717, |
|
"logits/oppo_real": -2.9500231742858887, |
|
"logits/real": -1.7524856328964233, |
|
"logps/generated": -292.76727294921875, |
|
"logps/oppo_gen": -76.79248809814453, |
|
"logps/oppo_real": -287.1414794921875, |
|
"logps/real": -329.4079284667969, |
|
"loss": 0.34, |
|
"loss/gen": 0.22407092154026031, |
|
"loss/real": 0.4708248972892761, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -215.97479248046875, |
|
"rewards/margins": 173.7083740234375, |
|
"rewards/real": -42.266422271728516, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 158.62792427367572, |
|
"learning_rate": 4.846866096866097e-07, |
|
"logits/generated": -2.008450984954834, |
|
"logits/oppo_generated": -2.8624868392944336, |
|
"logits/oppo_real": -3.0077338218688965, |
|
"logits/real": -1.9637665748596191, |
|
"logps/generated": -250.85487365722656, |
|
"logps/oppo_gen": -103.01863861083984, |
|
"logps/oppo_real": -484.10565185546875, |
|
"logps/real": -474.10980224609375, |
|
"loss": 0.554, |
|
"loss/gen": 0.6981667280197144, |
|
"loss/real": 0.05877792090177536, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -147.83624267578125, |
|
"rewards/margins": 157.83209228515625, |
|
"rewards/real": 9.995855331420898, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"grad_norm": 94.57272850331907, |
|
"learning_rate": 4.843304843304843e-07, |
|
"logits/generated": -1.8665781021118164, |
|
"logits/oppo_generated": -2.976921796798706, |
|
"logits/oppo_real": -3.0094780921936035, |
|
"logits/real": -1.9989147186279297, |
|
"logps/generated": -280.32012939453125, |
|
"logps/oppo_gen": -66.51390075683594, |
|
"logps/oppo_real": -174.39071655273438, |
|
"logps/real": -190.4137420654297, |
|
"loss": 0.5, |
|
"loss/gen": 0.09350240230560303, |
|
"loss/real": 0.278587281703949, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -213.8062286376953, |
|
"rewards/margins": 197.783203125, |
|
"rewards/real": -16.02302360534668, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 46.788289592255346, |
|
"learning_rate": 4.839743589743589e-07, |
|
"logits/generated": -1.8179612159729004, |
|
"logits/oppo_generated": -3.01529598236084, |
|
"logits/oppo_real": -2.9185380935668945, |
|
"logits/real": -2.0011558532714844, |
|
"logps/generated": -466.2247619628906, |
|
"logps/oppo_gen": -86.220458984375, |
|
"logps/oppo_real": -329.8023376464844, |
|
"logps/real": -332.1396179199219, |
|
"loss": 0.4253, |
|
"loss/gen": 0.05652913451194763, |
|
"loss/real": 0.2249125838279724, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -380.00433349609375, |
|
"rewards/margins": 377.66705322265625, |
|
"rewards/real": -2.3372955322265625, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"grad_norm": 45.30856125122767, |
|
"learning_rate": 4.836182336182337e-07, |
|
"logits/generated": -1.641934871673584, |
|
"logits/oppo_generated": -2.864108085632324, |
|
"logits/oppo_real": -2.8596436977386475, |
|
"logits/real": -1.7771339416503906, |
|
"logps/generated": -308.81744384765625, |
|
"logps/oppo_gen": -79.35113525390625, |
|
"logps/oppo_real": -357.43438720703125, |
|
"logps/real": -351.2286376953125, |
|
"loss": 0.4106, |
|
"loss/gen": 0.02170167863368988, |
|
"loss/real": 0.11313143372535706, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -229.46632385253906, |
|
"rewards/margins": 235.67205810546875, |
|
"rewards/real": 6.2057342529296875, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 60.23657786179424, |
|
"learning_rate": 4.832621082621082e-07, |
|
"logits/generated": -1.7438234090805054, |
|
"logits/oppo_generated": -2.635812282562256, |
|
"logits/oppo_real": -2.784547805786133, |
|
"logits/real": -1.6820318698883057, |
|
"logps/generated": -362.5193786621094, |
|
"logps/oppo_gen": -87.48421478271484, |
|
"logps/oppo_real": -250.10626220703125, |
|
"logps/real": -273.316162109375, |
|
"loss": 0.3701, |
|
"loss/gen": 0.0328507125377655, |
|
"loss/real": 0.2887704372406006, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -275.03515625, |
|
"rewards/margins": 251.82522583007812, |
|
"rewards/real": -23.209918975830078, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 78.00425982458425, |
|
"learning_rate": 4.829059829059829e-07, |
|
"logits/generated": -1.6737346649169922, |
|
"logits/oppo_generated": -2.9845218658447266, |
|
"logits/oppo_real": -3.016307830810547, |
|
"logits/real": -2.0023183822631836, |
|
"logps/generated": -242.6670379638672, |
|
"logps/oppo_gen": -55.523197174072266, |
|
"logps/oppo_real": -291.81378173828125, |
|
"logps/real": -357.2690734863281, |
|
"loss": 0.4034, |
|
"loss/gen": 0.28107643127441406, |
|
"loss/real": 0.7271257638931274, |
|
"rewards/accuracies": 0.75, |
|
"rewards/generated": -187.14382934570312, |
|
"rewards/margins": 121.68854522705078, |
|
"rewards/real": -65.45529174804688, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"grad_norm": 36.09174608885233, |
|
"learning_rate": 4.825498575498575e-07, |
|
"logits/generated": -1.8338725566864014, |
|
"logits/oppo_generated": -2.8317785263061523, |
|
"logits/oppo_real": -2.849785327911377, |
|
"logits/real": -1.9712032079696655, |
|
"logps/generated": -256.17181396484375, |
|
"logps/oppo_gen": -65.48351287841797, |
|
"logps/oppo_real": -259.8980712890625, |
|
"logps/real": -279.2626953125, |
|
"loss": 0.3141, |
|
"loss/gen": 0.18348117172718048, |
|
"loss/real": 0.2724674642086029, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -190.68829345703125, |
|
"rewards/margins": 171.32366943359375, |
|
"rewards/real": -19.364639282226562, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 43.09703010540576, |
|
"learning_rate": 4.821937321937321e-07, |
|
"logits/generated": -1.9401007890701294, |
|
"logits/oppo_generated": -2.9616637229919434, |
|
"logits/oppo_real": -2.8549320697784424, |
|
"logits/real": -2.2171854972839355, |
|
"logps/generated": -271.0581359863281, |
|
"logps/oppo_gen": -66.1073226928711, |
|
"logps/oppo_real": -297.0393981933594, |
|
"logps/real": -283.0654602050781, |
|
"loss": 0.3459, |
|
"loss/gen": 0.11610506474971771, |
|
"loss/real": 0.01628967374563217, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -204.95083618164062, |
|
"rewards/margins": 218.9247589111328, |
|
"rewards/real": 13.973949432373047, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"grad_norm": 39.01662618859587, |
|
"learning_rate": 4.818376068376069e-07, |
|
"logits/generated": -2.057642936706543, |
|
"logits/oppo_generated": -2.944060802459717, |
|
"logits/oppo_real": -2.977362632751465, |
|
"logits/real": -2.0833921432495117, |
|
"logps/generated": -284.1174621582031, |
|
"logps/oppo_gen": -49.032493591308594, |
|
"logps/oppo_real": -197.13412475585938, |
|
"logps/real": -235.72854614257812, |
|
"loss": 0.3748, |
|
"loss/gen": 0.1771981120109558, |
|
"loss/real": 0.44538283348083496, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -235.0849609375, |
|
"rewards/margins": 196.49057006835938, |
|
"rewards/real": -38.594398498535156, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 76.44660552368398, |
|
"learning_rate": 4.814814814814814e-07, |
|
"logits/generated": -2.012396812438965, |
|
"logits/oppo_generated": -2.9935152530670166, |
|
"logits/oppo_real": -2.782620906829834, |
|
"logits/real": -2.228747844696045, |
|
"logps/generated": -225.5810546875, |
|
"logps/oppo_gen": -79.41259002685547, |
|
"logps/oppo_real": -304.58465576171875, |
|
"logps/real": -294.3427734375, |
|
"loss": 0.4461, |
|
"loss/gen": 0.6170589327812195, |
|
"loss/real": 0.013846360146999359, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -146.16847229003906, |
|
"rewards/margins": 156.41033935546875, |
|
"rewards/real": 10.241872787475586, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 44.887227127320834, |
|
"learning_rate": 4.811253561253561e-07, |
|
"logits/generated": -1.8049852848052979, |
|
"logits/oppo_generated": -3.0348973274230957, |
|
"logits/oppo_real": -2.8550362586975098, |
|
"logits/real": -2.1674275398254395, |
|
"logps/generated": -334.69403076171875, |
|
"logps/oppo_gen": -147.11734008789062, |
|
"logps/oppo_real": -324.0049743652344, |
|
"logps/real": -330.11480712890625, |
|
"loss": 0.3668, |
|
"loss/gen": 0.32383447885513306, |
|
"loss/real": 0.19668863713741302, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -187.57669067382812, |
|
"rewards/margins": 181.46685791015625, |
|
"rewards/real": -6.109820365905762, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"grad_norm": 33.3576313215254, |
|
"learning_rate": 4.807692307692307e-07, |
|
"logits/generated": -1.867674469947815, |
|
"logits/oppo_generated": -2.8708338737487793, |
|
"logits/oppo_real": -2.8143606185913086, |
|
"logits/real": -2.028439998626709, |
|
"logps/generated": -307.528564453125, |
|
"logps/oppo_gen": -81.77798461914062, |
|
"logps/oppo_real": -330.5220031738281, |
|
"logps/real": -330.1318054199219, |
|
"loss": 0.355, |
|
"loss/gen": 0.2485622763633728, |
|
"loss/real": 0.1294267177581787, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -225.75057983398438, |
|
"rewards/margins": 226.1407470703125, |
|
"rewards/real": 0.3901691436767578, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 48.87256477708762, |
|
"learning_rate": 4.804131054131054e-07, |
|
"logits/generated": -1.6193779706954956, |
|
"logits/oppo_generated": -2.7298922538757324, |
|
"logits/oppo_real": -2.698655605316162, |
|
"logits/real": -1.7858012914657593, |
|
"logps/generated": -299.572998046875, |
|
"logps/oppo_gen": -74.60616302490234, |
|
"logps/oppo_real": -251.41427612304688, |
|
"logps/real": -266.9776306152344, |
|
"loss": 0.3602, |
|
"loss/gen": 0.04555131494998932, |
|
"loss/real": 0.34169435501098633, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -224.9668426513672, |
|
"rewards/margins": 209.40345764160156, |
|
"rewards/real": -15.563373565673828, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"grad_norm": 63.47711970909282, |
|
"learning_rate": 4.8005698005698e-07, |
|
"logits/generated": -1.8660156726837158, |
|
"logits/oppo_generated": -2.9584808349609375, |
|
"logits/oppo_real": -2.8358330726623535, |
|
"logits/real": -2.125810384750366, |
|
"logps/generated": -329.8253479003906, |
|
"logps/oppo_gen": -83.23335266113281, |
|
"logps/oppo_real": -311.66064453125, |
|
"logps/real": -308.5706787109375, |
|
"loss": 0.3475, |
|
"loss/gen": 0.030205443501472473, |
|
"loss/real": 0.12542490661144257, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -246.59197998046875, |
|
"rewards/margins": 249.68197631835938, |
|
"rewards/real": 3.089975357055664, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 41.84529030025362, |
|
"learning_rate": 4.797008547008547e-07, |
|
"logits/generated": -1.794067621231079, |
|
"logits/oppo_generated": -2.83894681930542, |
|
"logits/oppo_real": -2.731696605682373, |
|
"logits/real": -1.9485492706298828, |
|
"logps/generated": -330.9621887207031, |
|
"logps/oppo_gen": -103.72628021240234, |
|
"logps/oppo_real": -218.9561767578125, |
|
"logps/real": -219.8974609375, |
|
"loss": 0.307, |
|
"loss/gen": 0.06028883159160614, |
|
"loss/real": 0.14670495688915253, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -227.23593139648438, |
|
"rewards/margins": 226.29464721679688, |
|
"rewards/real": -0.9412956237792969, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"grad_norm": 32.90370323418684, |
|
"learning_rate": 4.793447293447293e-07, |
|
"logits/generated": -1.8011112213134766, |
|
"logits/oppo_generated": -2.7633142471313477, |
|
"logits/oppo_real": -2.9560418128967285, |
|
"logits/real": -1.7214103937149048, |
|
"logps/generated": -348.088134765625, |
|
"logps/oppo_gen": -74.91079711914062, |
|
"logps/oppo_real": -299.2713623046875, |
|
"logps/real": -282.55438232421875, |
|
"loss": 0.3138, |
|
"loss/gen": 0.033689409494400024, |
|
"loss/real": 0.08471229672431946, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -273.1773376464844, |
|
"rewards/margins": 289.8943176269531, |
|
"rewards/real": 16.716989517211914, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 63.98752090862131, |
|
"learning_rate": 4.78988603988604e-07, |
|
"logits/generated": -1.8813642263412476, |
|
"logits/oppo_generated": -2.8308515548706055, |
|
"logits/oppo_real": -3.085522174835205, |
|
"logits/real": -1.916734218597412, |
|
"logps/generated": -354.05621337890625, |
|
"logps/oppo_gen": -134.01483154296875, |
|
"logps/oppo_real": -442.37945556640625, |
|
"logps/real": -426.1463928222656, |
|
"loss": 0.3186, |
|
"loss/gen": 0.08905501663684845, |
|
"loss/real": 0.009031563997268677, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -220.04135131835938, |
|
"rewards/margins": 236.27442932128906, |
|
"rewards/real": 16.233068466186523, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 28.016905994138163, |
|
"learning_rate": 4.786324786324786e-07, |
|
"logits/generated": -1.7203209400177002, |
|
"logits/oppo_generated": -2.8044867515563965, |
|
"logits/oppo_real": -2.8060150146484375, |
|
"logits/real": -1.783670425415039, |
|
"logps/generated": -276.6085205078125, |
|
"logps/oppo_gen": -51.423309326171875, |
|
"logps/oppo_real": -222.54879760742188, |
|
"logps/real": -233.36328125, |
|
"loss": 0.2702, |
|
"loss/gen": 0.014314472675323486, |
|
"loss/real": 0.26025110483169556, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -225.1852264404297, |
|
"rewards/margins": 214.37075805664062, |
|
"rewards/real": -10.814473152160645, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"grad_norm": 42.268631478097916, |
|
"learning_rate": 4.782763532763532e-07, |
|
"logits/generated": -1.8753924369812012, |
|
"logits/oppo_generated": -2.932793140411377, |
|
"logits/oppo_real": -2.9959638118743896, |
|
"logits/real": -2.070061683654785, |
|
"logps/generated": -297.700927734375, |
|
"logps/oppo_gen": -68.20332336425781, |
|
"logps/oppo_real": -376.541015625, |
|
"logps/real": -386.68438720703125, |
|
"loss": 0.2846, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.2723137140274048, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -229.4976043701172, |
|
"rewards/margins": 219.35421752929688, |
|
"rewards/real": -10.143372535705566, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 62.84870885768557, |
|
"learning_rate": 4.779202279202279e-07, |
|
"logits/generated": -1.745549201965332, |
|
"logits/oppo_generated": -2.780601739883423, |
|
"logits/oppo_real": -2.8726038932800293, |
|
"logits/real": -1.851616621017456, |
|
"logps/generated": -308.7665100097656, |
|
"logps/oppo_gen": -75.83106994628906, |
|
"logps/oppo_real": -327.609619140625, |
|
"logps/real": -323.2357177734375, |
|
"loss": 0.3766, |
|
"loss/gen": 0.17890962958335876, |
|
"loss/real": 0.07637953758239746, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -232.93545532226562, |
|
"rewards/margins": 237.3093719482422, |
|
"rewards/real": 4.373929977416992, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 80.61028208687152, |
|
"learning_rate": 4.775641025641026e-07, |
|
"logits/generated": -1.832048773765564, |
|
"logits/oppo_generated": -2.91953706741333, |
|
"logits/oppo_real": -2.820370674133301, |
|
"logits/real": -2.110717535018921, |
|
"logps/generated": -299.78704833984375, |
|
"logps/oppo_gen": -75.91517639160156, |
|
"logps/oppo_real": -531.0400390625, |
|
"logps/real": -533.7149047851562, |
|
"loss": 0.3108, |
|
"loss/gen": 0.07047438621520996, |
|
"loss/real": 0.11644826829433441, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -223.87188720703125, |
|
"rewards/margins": 221.1970672607422, |
|
"rewards/real": -2.6748085021972656, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 21.048619255395202, |
|
"learning_rate": 4.772079772079772e-07, |
|
"logits/generated": -1.83795964717865, |
|
"logits/oppo_generated": -2.927794933319092, |
|
"logits/oppo_real": -2.8259315490722656, |
|
"logits/real": -2.103085517883301, |
|
"logps/generated": -293.03070068359375, |
|
"logps/oppo_gen": -75.32722473144531, |
|
"logps/oppo_real": -334.3116149902344, |
|
"logps/real": -329.84771728515625, |
|
"loss": 0.2749, |
|
"loss/gen": 0.1955680400133133, |
|
"loss/real": 0.08636181056499481, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -217.70347595214844, |
|
"rewards/margins": 222.16734313964844, |
|
"rewards/real": 4.463872909545898, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"grad_norm": 27.770455184464364, |
|
"learning_rate": 4.768518518518518e-07, |
|
"logits/generated": -1.8320189714431763, |
|
"logits/oppo_generated": -2.798323154449463, |
|
"logits/oppo_real": -3.0827927589416504, |
|
"logits/real": -2.0010604858398438, |
|
"logps/generated": -309.42236328125, |
|
"logps/oppo_gen": -85.98326110839844, |
|
"logps/oppo_real": -484.7052001953125, |
|
"logps/real": -477.567626953125, |
|
"loss": 0.2701, |
|
"loss/gen": 0.009465828537940979, |
|
"loss/real": 0.04307375103235245, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -223.43910217285156, |
|
"rewards/margins": 230.57669067382812, |
|
"rewards/real": 7.137579441070557, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 35.29567146022477, |
|
"learning_rate": 4.764957264957264e-07, |
|
"logits/generated": -1.6535446643829346, |
|
"logits/oppo_generated": -2.820817232131958, |
|
"logits/oppo_real": -2.7580766677856445, |
|
"logits/real": -1.9198226928710938, |
|
"logps/generated": -388.9769287109375, |
|
"logps/oppo_gen": -98.39456176757812, |
|
"logps/oppo_real": -435.86871337890625, |
|
"logps/real": -446.45001220703125, |
|
"loss": 0.3203, |
|
"loss/gen": 0.04320457577705383, |
|
"loss/real": 0.25816553831100464, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -290.58233642578125, |
|
"rewards/margins": 280.00103759765625, |
|
"rewards/real": -10.581292152404785, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 23.55976183940932, |
|
"learning_rate": 4.761396011396011e-07, |
|
"logits/generated": -1.8841466903686523, |
|
"logits/oppo_generated": -2.991581439971924, |
|
"logits/oppo_real": -3.002182960510254, |
|
"logits/real": -2.131711483001709, |
|
"logps/generated": -322.656494140625, |
|
"logps/oppo_gen": -81.12940216064453, |
|
"logps/oppo_real": -296.61138916015625, |
|
"logps/real": -295.33599853515625, |
|
"loss": 0.2212, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.21376575529575348, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -241.52708435058594, |
|
"rewards/margins": 242.80247497558594, |
|
"rewards/real": 1.2753915786743164, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"grad_norm": 23.497506516649803, |
|
"learning_rate": 4.7578347578347577e-07, |
|
"logits/generated": -1.910527229309082, |
|
"logits/oppo_generated": -2.8433456420898438, |
|
"logits/oppo_real": -3.012195110321045, |
|
"logits/real": -1.9440618753433228, |
|
"logps/generated": -244.58482360839844, |
|
"logps/oppo_gen": -63.396881103515625, |
|
"logps/oppo_real": -288.55780029296875, |
|
"logps/real": -273.79388427734375, |
|
"loss": 0.1928, |
|
"loss/gen": 0.48318159580230713, |
|
"loss/real": 0.007603831589221954, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -181.18792724609375, |
|
"rewards/margins": 195.951904296875, |
|
"rewards/real": 14.763958930969238, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 60.16537065598345, |
|
"learning_rate": 4.754273504273504e-07, |
|
"logits/generated": -1.7519464492797852, |
|
"logits/oppo_generated": -2.75607967376709, |
|
"logits/oppo_real": -3.044626235961914, |
|
"logits/real": -1.8694126605987549, |
|
"logps/generated": -350.42431640625, |
|
"logps/oppo_gen": -89.79308319091797, |
|
"logps/oppo_real": -237.51071166992188, |
|
"logps/real": -241.78152465820312, |
|
"loss": 0.2889, |
|
"loss/gen": 0.0006802082061767578, |
|
"loss/real": 0.14765188097953796, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -260.6312561035156, |
|
"rewards/margins": 256.36041259765625, |
|
"rewards/real": -4.270831108093262, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"grad_norm": 60.16537065598345, |
|
"learning_rate": 4.754273504273504e-07, |
|
"logits/generated": -2.0505292415618896, |
|
"logits/oppo_generated": -2.9334537982940674, |
|
"logits/oppo_real": -3.0197911262512207, |
|
"logits/real": -2.080873966217041, |
|
"logps/generated": -322.20416259765625, |
|
"logps/oppo_gen": -86.25882720947266, |
|
"logps/oppo_real": -171.73361206054688, |
|
"logps/real": -155.16622924804688, |
|
"loss": 0.2352, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.036911122500896454, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -235.94532775878906, |
|
"rewards/margins": 252.5127410888672, |
|
"rewards/real": 16.567398071289062, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 61.99013711558779, |
|
"learning_rate": 4.7507122507122507e-07, |
|
"logits/generated": -1.7211092710494995, |
|
"logits/oppo_generated": -2.8885016441345215, |
|
"logits/oppo_real": -2.9670629501342773, |
|
"logits/real": -2.023648977279663, |
|
"logps/generated": -242.21588134765625, |
|
"logps/oppo_gen": -52.36747741699219, |
|
"logps/oppo_real": -234.88699340820312, |
|
"logps/real": -226.70179748535156, |
|
"loss": 0.2528, |
|
"loss/gen": 0.2596120834350586, |
|
"loss/real": 0.1372058391571045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -189.84840393066406, |
|
"rewards/margins": 198.03363037109375, |
|
"rewards/real": 8.185225486755371, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 52.08842110566656, |
|
"learning_rate": 4.747150997150997e-07, |
|
"logits/generated": -1.8464903831481934, |
|
"logits/oppo_generated": -2.902094841003418, |
|
"logits/oppo_real": -2.738150119781494, |
|
"logits/real": -2.1050400733947754, |
|
"logps/generated": -330.8840026855469, |
|
"logps/oppo_gen": -71.77503967285156, |
|
"logps/oppo_real": -226.59805297851562, |
|
"logps/real": -215.78268432617188, |
|
"loss": 0.2926, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.001055300235748291, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -259.1089782714844, |
|
"rewards/margins": 269.92437744140625, |
|
"rewards/real": 10.815394401550293, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"grad_norm": 48.25866937579066, |
|
"learning_rate": 4.743589743589743e-07, |
|
"logits/generated": -1.7407563924789429, |
|
"logits/oppo_generated": -2.78233003616333, |
|
"logits/oppo_real": -2.810633420944214, |
|
"logits/real": -1.92085862159729, |
|
"logps/generated": -262.89617919921875, |
|
"logps/oppo_gen": -51.96064758300781, |
|
"logps/oppo_real": -160.8415069580078, |
|
"logps/real": -168.8170928955078, |
|
"loss": 0.2552, |
|
"loss/gen": 0.11834511160850525, |
|
"loss/real": 0.24989712238311768, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -210.93551635742188, |
|
"rewards/margins": 202.95993041992188, |
|
"rewards/real": -7.9755964279174805, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 48.57264161718894, |
|
"learning_rate": 4.74002849002849e-07, |
|
"logits/generated": -1.7602338790893555, |
|
"logits/oppo_generated": -2.7906460762023926, |
|
"logits/oppo_real": -2.7454147338867188, |
|
"logits/real": -1.9760286808013916, |
|
"logps/generated": -233.21107482910156, |
|
"logps/oppo_gen": -67.77021789550781, |
|
"logps/oppo_real": -355.9058837890625, |
|
"logps/real": -337.13189697265625, |
|
"loss": 0.3077, |
|
"loss/gen": 0.4533146023750305, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -165.44085693359375, |
|
"rewards/margins": 184.21485900878906, |
|
"rewards/real": 18.774002075195312, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"grad_norm": 83.06293794560996, |
|
"learning_rate": 4.7364672364672366e-07, |
|
"logits/generated": -1.7146689891815186, |
|
"logits/oppo_generated": -2.784420967102051, |
|
"logits/oppo_real": -2.58797550201416, |
|
"logits/real": -1.975735068321228, |
|
"logps/generated": -264.473388671875, |
|
"logps/oppo_gen": -53.4489631652832, |
|
"logps/oppo_real": -213.77337646484375, |
|
"logps/real": -224.57752990722656, |
|
"loss": 0.2609, |
|
"loss/gen": 0.06298929452896118, |
|
"loss/real": 0.2708776891231537, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -211.02444458007812, |
|
"rewards/margins": 200.22027587890625, |
|
"rewards/real": -10.804159164428711, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 54.351966206097366, |
|
"learning_rate": 4.7329059829059823e-07, |
|
"logits/generated": -1.9484028816223145, |
|
"logits/oppo_generated": -2.9693868160247803, |
|
"logits/oppo_real": -2.897064208984375, |
|
"logits/real": -2.2127110958099365, |
|
"logps/generated": -297.3500061035156, |
|
"logps/oppo_gen": -65.07535552978516, |
|
"logps/oppo_real": -380.3414306640625, |
|
"logps/real": -407.437255859375, |
|
"loss": 0.3122, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.37634211778640747, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -232.27462768554688, |
|
"rewards/margins": 205.17877197265625, |
|
"rewards/real": -27.095857620239258, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"grad_norm": 44.581477978090156, |
|
"learning_rate": 4.729344729344729e-07, |
|
"logits/generated": -2.0135183334350586, |
|
"logits/oppo_generated": -2.8074076175689697, |
|
"logits/oppo_real": -2.9744620323181152, |
|
"logits/real": -2.006603479385376, |
|
"logps/generated": -310.1141357421875, |
|
"logps/oppo_gen": -81.67523193359375, |
|
"logps/oppo_real": -332.10321044921875, |
|
"logps/real": -344.72015380859375, |
|
"loss": 0.2123, |
|
"loss/gen": 0.24567674100399017, |
|
"loss/real": 0.2426839917898178, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -228.43890380859375, |
|
"rewards/margins": 215.82196044921875, |
|
"rewards/real": -12.616944313049316, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 75.96934131814294, |
|
"learning_rate": 4.725783475783476e-07, |
|
"logits/generated": -1.9424694776535034, |
|
"logits/oppo_generated": -2.8780970573425293, |
|
"logits/oppo_real": -2.880333185195923, |
|
"logits/real": -2.094572067260742, |
|
"logps/generated": -384.4183654785156, |
|
"logps/oppo_gen": -83.72149658203125, |
|
"logps/oppo_real": -272.17291259765625, |
|
"logps/real": -253.05941772460938, |
|
"loss": 0.3365, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.005366437137126923, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -300.6968688964844, |
|
"rewards/margins": 319.81036376953125, |
|
"rewards/real": 19.113483428955078, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 68.25540843955152, |
|
"learning_rate": 4.722222222222222e-07, |
|
"logits/generated": -1.8966500759124756, |
|
"logits/oppo_generated": -2.8689210414886475, |
|
"logits/oppo_real": -3.036574602127075, |
|
"logits/real": -2.050349473953247, |
|
"logps/generated": -328.08782958984375, |
|
"logps/oppo_gen": -61.806739807128906, |
|
"logps/oppo_real": -213.864013671875, |
|
"logps/real": -201.69223022460938, |
|
"loss": 0.3216, |
|
"loss/gen": 0.027267932891845703, |
|
"loss/real": 0.0526232048869133, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -266.2810974121094, |
|
"rewards/margins": 278.45294189453125, |
|
"rewards/real": 12.171795845031738, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"grad_norm": 50.70511841227792, |
|
"learning_rate": 4.7186609686609683e-07, |
|
"logits/generated": -1.9891793727874756, |
|
"logits/oppo_generated": -2.847443103790283, |
|
"logits/oppo_real": -2.9110074043273926, |
|
"logits/real": -2.0313127040863037, |
|
"logps/generated": -321.2769775390625, |
|
"logps/oppo_gen": -68.70259857177734, |
|
"logps/oppo_real": -252.70947265625, |
|
"logps/real": -237.62449645996094, |
|
"loss": 0.2511, |
|
"loss/gen": 0.01610538363456726, |
|
"loss/real": 0.042245976626873016, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -252.5743865966797, |
|
"rewards/margins": 267.6593322753906, |
|
"rewards/real": 15.084959983825684, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 30.791028943166324, |
|
"learning_rate": 4.715099715099715e-07, |
|
"logits/generated": -2.0675950050354004, |
|
"logits/oppo_generated": -2.850525140762329, |
|
"logits/oppo_real": -2.9623799324035645, |
|
"logits/real": -2.093207359313965, |
|
"logps/generated": -307.7158203125, |
|
"logps/oppo_gen": -70.65492248535156, |
|
"logps/oppo_real": -241.07968139648438, |
|
"logps/real": -263.4491271972656, |
|
"loss": 0.2583, |
|
"loss/gen": 0.007858753204345703, |
|
"loss/real": 0.329021155834198, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -237.0609130859375, |
|
"rewards/margins": 214.69146728515625, |
|
"rewards/real": -22.36945152282715, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 50.498172538097855, |
|
"learning_rate": 4.711538461538461e-07, |
|
"logits/generated": -1.8775372505187988, |
|
"logits/oppo_generated": -2.760641574859619, |
|
"logits/oppo_real": -2.835960865020752, |
|
"logits/real": -2.0484185218811035, |
|
"logps/generated": -296.631591796875, |
|
"logps/oppo_gen": -77.80702209472656, |
|
"logps/oppo_real": -309.97265625, |
|
"logps/real": -322.2628173828125, |
|
"loss": 0.2541, |
|
"loss/gen": 0.1177206039428711, |
|
"loss/real": 0.30542153120040894, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -218.82455444335938, |
|
"rewards/margins": 206.5343475341797, |
|
"rewards/real": -12.290206909179688, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 43.8276807575525, |
|
"learning_rate": 4.707977207977208e-07, |
|
"logits/generated": -1.9596431255340576, |
|
"logits/oppo_generated": -2.762300491333008, |
|
"logits/oppo_real": -2.91391658782959, |
|
"logits/real": -2.064877986907959, |
|
"logps/generated": -285.3265075683594, |
|
"logps/oppo_gen": -79.30331420898438, |
|
"logps/oppo_real": -206.95407104492188, |
|
"logps/real": -199.25717163085938, |
|
"loss": 0.2071, |
|
"loss/gen": 0.15041278302669525, |
|
"loss/real": 0.06987226009368896, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -206.023193359375, |
|
"rewards/margins": 213.7200927734375, |
|
"rewards/real": 7.696885108947754, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 92.10083850391916, |
|
"learning_rate": 4.7044159544159537e-07, |
|
"logits/generated": -1.6970422267913818, |
|
"logits/oppo_generated": -2.8723740577697754, |
|
"logits/oppo_real": -2.730229139328003, |
|
"logits/real": -2.0857439041137695, |
|
"logps/generated": -367.9686584472656, |
|
"logps/oppo_gen": -68.4917984008789, |
|
"logps/oppo_real": -205.74790954589844, |
|
"logps/real": -213.32887268066406, |
|
"loss": 0.2593, |
|
"loss/gen": 0.01937010884284973, |
|
"loss/real": 0.20608964562416077, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -299.4768371582031, |
|
"rewards/margins": 291.8958740234375, |
|
"rewards/real": -7.580964088439941, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"grad_norm": 78.7925604237164, |
|
"learning_rate": 4.7008547008547005e-07, |
|
"logits/generated": -2.007908344268799, |
|
"logits/oppo_generated": -2.833265781402588, |
|
"logits/oppo_real": -2.8581643104553223, |
|
"logits/real": -2.1557552814483643, |
|
"logps/generated": -311.3591613769531, |
|
"logps/oppo_gen": -72.44357299804688, |
|
"logps/oppo_real": -294.85699462890625, |
|
"logps/real": -279.35943603515625, |
|
"loss": 0.2969, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.0311884805560112, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -238.91558837890625, |
|
"rewards/margins": 254.41310119628906, |
|
"rewards/real": 15.497525215148926, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 68.66656869430713, |
|
"learning_rate": 4.697293447293447e-07, |
|
"logits/generated": -1.8297350406646729, |
|
"logits/oppo_generated": -2.8131227493286133, |
|
"logits/oppo_real": -2.815453052520752, |
|
"logits/real": -2.1473500728607178, |
|
"logps/generated": -365.8243408203125, |
|
"logps/oppo_gen": -118.46414184570312, |
|
"logps/oppo_real": -350.6376953125, |
|
"logps/real": -340.7218933105469, |
|
"loss": 0.2489, |
|
"loss/gen": 0.0096682608127594, |
|
"loss/real": 0.037821196019649506, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -247.36019897460938, |
|
"rewards/margins": 257.2760009765625, |
|
"rewards/real": 9.915809631347656, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"grad_norm": 41.412493633036036, |
|
"learning_rate": 4.6937321937321934e-07, |
|
"logits/generated": -1.9806835651397705, |
|
"logits/oppo_generated": -2.868478775024414, |
|
"logits/oppo_real": -2.87443208694458, |
|
"logits/real": -2.1723623275756836, |
|
"logps/generated": -303.59033203125, |
|
"logps/oppo_gen": -72.4801025390625, |
|
"logps/oppo_real": -315.2503356933594, |
|
"logps/real": -303.8700256347656, |
|
"loss": 0.2741, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.03125518560409546, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -231.11021423339844, |
|
"rewards/margins": 242.4905242919922, |
|
"rewards/real": 11.380315780639648, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 70.06525975831389, |
|
"learning_rate": 4.69017094017094e-07, |
|
"logits/generated": -1.5762176513671875, |
|
"logits/oppo_generated": -2.5010550022125244, |
|
"logits/oppo_real": -2.635188102722168, |
|
"logits/real": -1.7362252473831177, |
|
"logps/generated": -365.1429443359375, |
|
"logps/oppo_gen": -80.23007202148438, |
|
"logps/oppo_real": -347.019287109375, |
|
"logps/real": -397.1420593261719, |
|
"loss": 0.1983, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.6604471206665039, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -284.9128723144531, |
|
"rewards/margins": 234.7901153564453, |
|
"rewards/real": -50.12276840209961, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"grad_norm": 55.3230546956635, |
|
"learning_rate": 4.6866096866096864e-07, |
|
"logits/generated": -1.6325474977493286, |
|
"logits/oppo_generated": -2.6126623153686523, |
|
"logits/oppo_real": -2.6145567893981934, |
|
"logits/real": -1.8411592245101929, |
|
"logps/generated": -336.42913818359375, |
|
"logps/oppo_gen": -73.5291748046875, |
|
"logps/oppo_real": -317.5265808105469, |
|
"logps/real": -295.2335205078125, |
|
"loss": 0.2608, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.005345538258552551, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -262.89996337890625, |
|
"rewards/margins": 285.1929931640625, |
|
"rewards/real": 22.293060302734375, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 45.401364908412994, |
|
"learning_rate": 4.6830484330484326e-07, |
|
"logits/generated": -2.1946914196014404, |
|
"logits/oppo_generated": -3.0297465324401855, |
|
"logits/oppo_real": -3.101362705230713, |
|
"logits/real": -2.43679141998291, |
|
"logps/generated": -350.4021911621094, |
|
"logps/oppo_gen": -120.2161865234375, |
|
"logps/oppo_real": -532.0965576171875, |
|
"logps/real": -521.9387817382812, |
|
"loss": 0.3068, |
|
"loss/gen": 0.011398926377296448, |
|
"loss/real": 0.09254828840494156, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -230.1859893798828, |
|
"rewards/margins": 240.34373474121094, |
|
"rewards/real": 10.15774154663086, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 78.93660616576285, |
|
"learning_rate": 4.6794871794871794e-07, |
|
"logits/generated": -1.9267959594726562, |
|
"logits/oppo_generated": -2.4462087154388428, |
|
"logits/oppo_real": -2.882254123687744, |
|
"logits/real": -1.6094073057174683, |
|
"logps/generated": -312.116943359375, |
|
"logps/oppo_gen": -74.71348571777344, |
|
"logps/oppo_real": -324.086669921875, |
|
"logps/real": -350.8844909667969, |
|
"loss": 0.2236, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.3186296820640564, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -237.4034423828125, |
|
"rewards/margins": 210.60562133789062, |
|
"rewards/real": -26.79781723022461, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"grad_norm": 45.77649026025122, |
|
"learning_rate": 4.675925925925926e-07, |
|
"logits/generated": -2.0611488819122314, |
|
"logits/oppo_generated": -2.9427778720855713, |
|
"logits/oppo_real": -2.9869794845581055, |
|
"logits/real": -2.2055954933166504, |
|
"logps/generated": -256.5621337890625, |
|
"logps/oppo_gen": -57.98387908935547, |
|
"logps/oppo_real": -299.8202209472656, |
|
"logps/real": -324.31402587890625, |
|
"loss": 0.2659, |
|
"loss/gen": 0.2458263337612152, |
|
"loss/real": 0.35364583134651184, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -198.57826232910156, |
|
"rewards/margins": 174.08447265625, |
|
"rewards/real": -24.493789672851562, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 129.35351572821668, |
|
"learning_rate": 4.672364672364672e-07, |
|
"logits/generated": -1.9105026721954346, |
|
"logits/oppo_generated": -2.462200880050659, |
|
"logits/oppo_real": -2.7382378578186035, |
|
"logits/real": -1.603495478630066, |
|
"logps/generated": -283.0691223144531, |
|
"logps/oppo_gen": -109.31198120117188, |
|
"logps/oppo_real": -333.22021484375, |
|
"logps/real": -322.46685791015625, |
|
"loss": 0.291, |
|
"loss/gen": 0.48477140069007874, |
|
"loss/real": 0.011933863162994385, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -173.7571258544922, |
|
"rewards/margins": 184.51043701171875, |
|
"rewards/real": 10.753315925598145, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"grad_norm": 35.79489027671233, |
|
"learning_rate": 4.6688034188034186e-07, |
|
"logits/generated": -2.0710315704345703, |
|
"logits/oppo_generated": -2.9814329147338867, |
|
"logits/oppo_real": -2.8366198539733887, |
|
"logits/real": -2.283444404602051, |
|
"logps/generated": -375.65386962890625, |
|
"logps/oppo_gen": -117.97686767578125, |
|
"logps/oppo_real": -333.4208679199219, |
|
"logps/real": -320.87835693359375, |
|
"loss": 0.3409, |
|
"loss/gen": 0.10222794115543365, |
|
"loss/real": 0.14502938091754913, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -257.677001953125, |
|
"rewards/margins": 270.2195129394531, |
|
"rewards/real": 12.542512893676758, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 38.12187487191573, |
|
"learning_rate": 4.6652421652421653e-07, |
|
"logits/generated": -1.6588772535324097, |
|
"logits/oppo_generated": -2.6781723499298096, |
|
"logits/oppo_real": -2.516916513442993, |
|
"logits/real": -1.9674652814865112, |
|
"logps/generated": -311.58111572265625, |
|
"logps/oppo_gen": -60.19814682006836, |
|
"logps/oppo_real": -262.58551025390625, |
|
"logps/real": -258.6761474609375, |
|
"loss": 0.2715, |
|
"loss/gen": 0.24061748385429382, |
|
"loss/real": 0.12360702455043793, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -251.3829803466797, |
|
"rewards/margins": 255.29237365722656, |
|
"rewards/real": 3.9093809127807617, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 42.19599162224269, |
|
"learning_rate": 4.6616809116809116e-07, |
|
"logits/generated": -1.791245698928833, |
|
"logits/oppo_generated": -2.8787498474121094, |
|
"logits/oppo_real": -2.805894374847412, |
|
"logits/real": -2.2519092559814453, |
|
"logps/generated": -353.5828552246094, |
|
"logps/oppo_gen": -124.28936767578125, |
|
"logps/oppo_real": -606.1627807617188, |
|
"logps/real": -580.4054565429688, |
|
"loss": 0.2072, |
|
"loss/gen": 0.14101070165634155, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -229.29348754882812, |
|
"rewards/margins": 255.05081176757812, |
|
"rewards/real": 25.75733184814453, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"grad_norm": 26.621898201803482, |
|
"learning_rate": 4.658119658119658e-07, |
|
"logits/generated": -2.115086078643799, |
|
"logits/oppo_generated": -2.765538454055786, |
|
"logits/oppo_real": -2.839543342590332, |
|
"logits/real": -2.0567030906677246, |
|
"logps/generated": -295.31121826171875, |
|
"logps/oppo_gen": -83.72669982910156, |
|
"logps/oppo_real": -361.6756591796875, |
|
"logps/real": -362.77685546875, |
|
"loss": 0.2186, |
|
"loss/gen": 0.22105728089809418, |
|
"loss/real": 0.2065998762845993, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -211.5845184326172, |
|
"rewards/margins": 210.4833526611328, |
|
"rewards/real": -1.1011724472045898, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 18.935126127513485, |
|
"learning_rate": 4.654558404558404e-07, |
|
"logits/generated": -2.0748369693756104, |
|
"logits/oppo_generated": -2.7416014671325684, |
|
"logits/oppo_real": -2.8941569328308105, |
|
"logits/real": -1.9765853881835938, |
|
"logps/generated": -263.2115478515625, |
|
"logps/oppo_gen": -51.659912109375, |
|
"logps/oppo_real": -267.5926513671875, |
|
"logps/real": -248.9636688232422, |
|
"loss": 0.2529, |
|
"loss/gen": 0.011083722114562988, |
|
"loss/real": 0.0029998421669006348, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -211.55162048339844, |
|
"rewards/margins": 230.18063354492188, |
|
"rewards/real": 18.62899398803711, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"grad_norm": 99.21669289206076, |
|
"learning_rate": 4.650997150997151e-07, |
|
"logits/generated": -1.808034896850586, |
|
"logits/oppo_generated": -2.609920024871826, |
|
"logits/oppo_real": -2.5399818420410156, |
|
"logits/real": -1.9245736598968506, |
|
"logps/generated": -339.20257568359375, |
|
"logps/oppo_gen": -81.96345520019531, |
|
"logps/oppo_real": -258.99554443359375, |
|
"logps/real": -247.95123291015625, |
|
"loss": 0.3165, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.023126445710659027, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -257.2391357421875, |
|
"rewards/margins": 268.28338623046875, |
|
"rewards/real": 11.044289588928223, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 39.08625683834169, |
|
"learning_rate": 4.6474358974358975e-07, |
|
"logits/generated": -2.0943126678466797, |
|
"logits/oppo_generated": -2.89731502532959, |
|
"logits/oppo_real": -2.861166000366211, |
|
"logits/real": -2.182774543762207, |
|
"logps/generated": -280.51513671875, |
|
"logps/oppo_gen": -61.10588073730469, |
|
"logps/oppo_real": -297.8720703125, |
|
"logps/real": -302.93011474609375, |
|
"loss": 0.2691, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.24881529808044434, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -219.40927124023438, |
|
"rewards/margins": 214.35122680664062, |
|
"rewards/real": -5.058034896850586, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"grad_norm": 36.347380649479064, |
|
"learning_rate": 4.643874643874643e-07, |
|
"logits/generated": -1.9154211282730103, |
|
"logits/oppo_generated": -2.8648695945739746, |
|
"logits/oppo_real": -2.711393356323242, |
|
"logits/real": -2.319422960281372, |
|
"logps/generated": -334.3785400390625, |
|
"logps/oppo_gen": -111.59371948242188, |
|
"logps/oppo_real": -521.255859375, |
|
"logps/real": -498.578857421875, |
|
"loss": 0.2889, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -222.78482055664062, |
|
"rewards/margins": 245.46185302734375, |
|
"rewards/real": 22.677024841308594, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 32.38887627937445, |
|
"learning_rate": 4.64031339031339e-07, |
|
"logits/generated": -2.1019668579101562, |
|
"logits/oppo_generated": -2.8064088821411133, |
|
"logits/oppo_real": -2.845989227294922, |
|
"logits/real": -2.034879446029663, |
|
"logps/generated": -286.20269775390625, |
|
"logps/oppo_gen": -52.78784942626953, |
|
"logps/oppo_real": -172.55088806152344, |
|
"logps/real": -191.98782348632812, |
|
"loss": 0.2025, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.3528357446193695, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -233.41485595703125, |
|
"rewards/margins": 213.9779052734375, |
|
"rewards/real": -19.43694305419922, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 40.25526611755675, |
|
"learning_rate": 4.6367521367521367e-07, |
|
"logits/generated": -2.0735549926757812, |
|
"logits/oppo_generated": -3.0264251232147217, |
|
"logits/oppo_real": -2.836057186126709, |
|
"logits/real": -2.4234282970428467, |
|
"logps/generated": -314.87933349609375, |
|
"logps/oppo_gen": -74.337158203125, |
|
"logps/oppo_real": -371.032470703125, |
|
"logps/real": -335.6429138183594, |
|
"loss": 0.1941, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -240.5421905517578, |
|
"rewards/margins": 275.9317321777344, |
|
"rewards/real": 35.38955307006836, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"grad_norm": 63.756242035094104, |
|
"learning_rate": 4.633190883190883e-07, |
|
"logits/generated": -2.2163453102111816, |
|
"logits/oppo_generated": -2.876476764678955, |
|
"logits/oppo_real": -2.912707805633545, |
|
"logits/real": -2.1965935230255127, |
|
"logps/generated": -285.357666015625, |
|
"logps/oppo_gen": -90.53692626953125, |
|
"logps/oppo_real": -383.74615478515625, |
|
"logps/real": -361.18463134765625, |
|
"loss": 0.2526, |
|
"loss/gen": 0.1636572778224945, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -194.82073974609375, |
|
"rewards/margins": 217.3822784423828, |
|
"rewards/real": 22.561546325683594, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 40.72590533351438, |
|
"learning_rate": 4.6296296296296297e-07, |
|
"logits/generated": -2.3514795303344727, |
|
"logits/oppo_generated": -2.9819746017456055, |
|
"logits/oppo_real": -3.1959123611450195, |
|
"logits/real": -2.3554928302764893, |
|
"logps/generated": -384.0709228515625, |
|
"logps/oppo_gen": -152.70217895507812, |
|
"logps/oppo_real": -483.54266357421875, |
|
"logps/real": -457.64434814453125, |
|
"loss": 0.2946, |
|
"loss/gen": 0.06552544236183167, |
|
"loss/real": 0.027411267161369324, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -231.36874389648438, |
|
"rewards/margins": 257.2670593261719, |
|
"rewards/real": 25.898303985595703, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"grad_norm": 43.21577996166808, |
|
"learning_rate": 4.626068376068376e-07, |
|
"logits/generated": -2.227564811706543, |
|
"logits/oppo_generated": -2.7378830909729004, |
|
"logits/oppo_real": -3.110536813735962, |
|
"logits/real": -2.0705103874206543, |
|
"logps/generated": -332.7789306640625, |
|
"logps/oppo_gen": -86.0918960571289, |
|
"logps/oppo_real": -447.7939147949219, |
|
"logps/real": -454.8052978515625, |
|
"loss": 0.3252, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.17898190021514893, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -246.68701171875, |
|
"rewards/margins": 239.67562866210938, |
|
"rewards/real": -7.011386871337891, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 49.99822018316627, |
|
"learning_rate": 4.622507122507122e-07, |
|
"logits/generated": -2.2606253623962402, |
|
"logits/oppo_generated": -2.7491419315338135, |
|
"logits/oppo_real": -3.191051483154297, |
|
"logits/real": -1.9832546710968018, |
|
"logps/generated": -342.39007568359375, |
|
"logps/oppo_gen": -96.26548767089844, |
|
"logps/oppo_real": -305.7531433105469, |
|
"logps/real": -301.7978820800781, |
|
"loss": 0.2736, |
|
"loss/gen": 0.2276860624551773, |
|
"loss/real": 0.10041482746601105, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -246.1245880126953, |
|
"rewards/margins": 250.07986450195312, |
|
"rewards/real": 3.9552855491638184, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"grad_norm": 55.35757344302584, |
|
"learning_rate": 4.618945868945869e-07, |
|
"logits/generated": -1.8759946823120117, |
|
"logits/oppo_generated": -2.8662476539611816, |
|
"logits/oppo_real": -2.7619881629943848, |
|
"logits/real": -2.2025985717773438, |
|
"logps/generated": -321.98046875, |
|
"logps/oppo_gen": -76.39656066894531, |
|
"logps/oppo_real": -342.36138916015625, |
|
"logps/real": -322.8648681640625, |
|
"loss": 0.302, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -245.58392333984375, |
|
"rewards/margins": 265.0804748535156, |
|
"rewards/real": 19.496538162231445, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 28.60285672945864, |
|
"learning_rate": 4.6153846153846156e-07, |
|
"logits/generated": -2.0779922008514404, |
|
"logits/oppo_generated": -2.973456859588623, |
|
"logits/oppo_real": -2.9541869163513184, |
|
"logits/real": -2.259559154510498, |
|
"logps/generated": -264.32720947265625, |
|
"logps/oppo_gen": -58.52758026123047, |
|
"logps/oppo_real": -196.6337127685547, |
|
"logps/real": -206.65615844726562, |
|
"loss": 0.2171, |
|
"loss/gen": 0.16810482740402222, |
|
"loss/real": 0.20324894785881042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -205.79962158203125, |
|
"rewards/margins": 195.77719116210938, |
|
"rewards/real": -10.022433280944824, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 34.86539756356759, |
|
"learning_rate": 4.6118233618233613e-07, |
|
"logits/generated": -2.0752716064453125, |
|
"logits/oppo_generated": -2.9579458236694336, |
|
"logits/oppo_real": -2.8345115184783936, |
|
"logits/real": -2.342416763305664, |
|
"logps/generated": -335.4797668457031, |
|
"logps/oppo_gen": -86.37559509277344, |
|
"logps/oppo_real": -329.4002685546875, |
|
"logps/real": -338.41485595703125, |
|
"loss": 0.2448, |
|
"loss/gen": 0.02429106831550598, |
|
"loss/real": 0.24715474247932434, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -249.10418701171875, |
|
"rewards/margins": 240.08961486816406, |
|
"rewards/real": -9.01455020904541, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"grad_norm": 38.988763217452316, |
|
"learning_rate": 4.608262108262108e-07, |
|
"logits/generated": -1.6155552864074707, |
|
"logits/oppo_generated": -2.4297678470611572, |
|
"logits/oppo_real": -2.5349526405334473, |
|
"logits/real": -1.6601029634475708, |
|
"logps/generated": -383.1607666015625, |
|
"logps/oppo_gen": -139.25880432128906, |
|
"logps/oppo_real": -366.9024658203125, |
|
"logps/real": -382.0296630859375, |
|
"loss": 0.2172, |
|
"loss/gen": 0.0033193975687026978, |
|
"loss/real": 0.2846258878707886, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -243.90196228027344, |
|
"rewards/margins": 228.77481079101562, |
|
"rewards/real": -15.12716293334961, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 48.59085099873484, |
|
"learning_rate": 4.6047008547008543e-07, |
|
"logits/generated": -1.6524815559387207, |
|
"logits/oppo_generated": -2.59027099609375, |
|
"logits/oppo_real": -2.5751681327819824, |
|
"logits/real": -1.8144464492797852, |
|
"logps/generated": -302.7337646484375, |
|
"logps/oppo_gen": -44.13750076293945, |
|
"logps/oppo_real": -126.39328002929688, |
|
"logps/real": -168.95840454101562, |
|
"loss": 0.2802, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.5524503588676453, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -258.5962829589844, |
|
"rewards/margins": 216.03115844726562, |
|
"rewards/real": -42.56513214111328, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 30.33221914180376, |
|
"learning_rate": 4.601139601139601e-07, |
|
"logits/generated": -1.917588710784912, |
|
"logits/oppo_generated": -2.8061888217926025, |
|
"logits/oppo_real": -2.885352611541748, |
|
"logits/real": -2.1351234912872314, |
|
"logps/generated": -359.21435546875, |
|
"logps/oppo_gen": -82.9956283569336, |
|
"logps/oppo_real": -287.7582702636719, |
|
"logps/real": -278.5579833984375, |
|
"loss": 0.1493, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.013260193169116974, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -276.21868896484375, |
|
"rewards/margins": 285.4189453125, |
|
"rewards/real": 9.200270652770996, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 41.78845855681551, |
|
"learning_rate": 4.5975783475783473e-07, |
|
"logits/generated": -1.7056605815887451, |
|
"logits/oppo_generated": -2.6804826259613037, |
|
"logits/oppo_real": -2.560675621032715, |
|
"logits/real": -2.0222690105438232, |
|
"logps/generated": -343.30615234375, |
|
"logps/oppo_gen": -125.20469665527344, |
|
"logps/oppo_real": -214.75454711914062, |
|
"logps/real": -228.00494384765625, |
|
"loss": 0.2917, |
|
"loss/gen": 0.05971069633960724, |
|
"loss/real": 0.2809779942035675, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -218.10145568847656, |
|
"rewards/margins": 204.85104370117188, |
|
"rewards/real": -13.250406265258789, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 40.09102040347923, |
|
"learning_rate": 4.5940170940170935e-07, |
|
"logits/generated": -1.9970121383666992, |
|
"logits/oppo_generated": -2.8161306381225586, |
|
"logits/oppo_real": -2.873737096786499, |
|
"logits/real": -2.104854106903076, |
|
"logps/generated": -301.53515625, |
|
"logps/oppo_gen": -39.4675178527832, |
|
"logps/oppo_real": -94.7720718383789, |
|
"logps/real": -111.1379165649414, |
|
"loss": 0.2653, |
|
"loss/gen": 0.4245451092720032, |
|
"loss/real": 0.26902616024017334, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -262.0676574707031, |
|
"rewards/margins": 245.70181274414062, |
|
"rewards/real": -16.365846633911133, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"grad_norm": 46.05510297204453, |
|
"learning_rate": 4.59045584045584e-07, |
|
"logits/generated": -1.9282357692718506, |
|
"logits/oppo_generated": -2.754338026046753, |
|
"logits/oppo_real": -2.6611428260803223, |
|
"logits/real": -2.1131625175476074, |
|
"logps/generated": -287.21087646484375, |
|
"logps/oppo_gen": -53.64311981201172, |
|
"logps/oppo_real": -189.60964965820312, |
|
"logps/real": -202.54542541503906, |
|
"loss": 0.2588, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.2690971791744232, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -233.5677490234375, |
|
"rewards/margins": 220.6319580078125, |
|
"rewards/real": -12.93579387664795, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 24.115036245971748, |
|
"learning_rate": 4.586894586894587e-07, |
|
"logits/generated": -2.1603193283081055, |
|
"logits/oppo_generated": -2.8700437545776367, |
|
"logits/oppo_real": -3.012883186340332, |
|
"logits/real": -2.2183475494384766, |
|
"logps/generated": -321.8924560546875, |
|
"logps/oppo_gen": -64.43563842773438, |
|
"logps/oppo_real": -366.68572998046875, |
|
"logps/real": -341.61822509765625, |
|
"loss": 0.185, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.0027311518788337708, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -257.45684814453125, |
|
"rewards/margins": 282.52435302734375, |
|
"rewards/real": 25.067520141601562, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"grad_norm": 44.438830907021085, |
|
"learning_rate": 4.5833333333333327e-07, |
|
"logits/generated": -1.8886809349060059, |
|
"logits/oppo_generated": -2.896176338195801, |
|
"logits/oppo_real": -2.7520911693573, |
|
"logits/real": -2.2607998847961426, |
|
"logps/generated": -359.7982177734375, |
|
"logps/oppo_gen": -94.6259765625, |
|
"logps/oppo_real": -329.9571533203125, |
|
"logps/real": -318.4111633300781, |
|
"loss": 0.2217, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.06740894168615341, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -265.1722717285156, |
|
"rewards/margins": 276.71826171875, |
|
"rewards/real": 11.545991897583008, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 36.220556469186114, |
|
"learning_rate": 4.5797720797720794e-07, |
|
"logits/generated": -2.0319433212280273, |
|
"logits/oppo_generated": -2.72526478767395, |
|
"logits/oppo_real": -2.760162591934204, |
|
"logits/real": -2.1123125553131104, |
|
"logps/generated": -314.482421875, |
|
"logps/oppo_gen": -70.71673583984375, |
|
"logps/oppo_real": -391.76458740234375, |
|
"logps/real": -421.2603454589844, |
|
"loss": 0.1594, |
|
"loss/gen": 0.0111636221408844, |
|
"loss/real": 0.3985538184642792, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -243.76568603515625, |
|
"rewards/margins": 214.2699737548828, |
|
"rewards/real": -29.495723724365234, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"grad_norm": 63.09100647204996, |
|
"learning_rate": 4.576210826210826e-07, |
|
"logits/generated": -2.4284703731536865, |
|
"logits/oppo_generated": -2.979785919189453, |
|
"logits/oppo_real": -3.2641677856445312, |
|
"logits/real": -2.3469204902648926, |
|
"logps/generated": -347.482421875, |
|
"logps/oppo_gen": -92.89317321777344, |
|
"logps/oppo_real": -330.3245849609375, |
|
"logps/real": -342.6152038574219, |
|
"loss": 0.2098, |
|
"loss/gen": 0.051213398575782776, |
|
"loss/real": 0.2405387908220291, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -254.58921813964844, |
|
"rewards/margins": 242.298583984375, |
|
"rewards/real": -12.290639877319336, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 61.13583908242775, |
|
"learning_rate": 4.5726495726495724e-07, |
|
"logits/generated": -1.760241985321045, |
|
"logits/oppo_generated": -2.775574207305908, |
|
"logits/oppo_real": -2.598371744155884, |
|
"logits/real": -2.1788487434387207, |
|
"logps/generated": -302.0619201660156, |
|
"logps/oppo_gen": -65.71693420410156, |
|
"logps/oppo_real": -220.19737243652344, |
|
"logps/real": -206.774658203125, |
|
"loss": 0.1849, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.006878167390823364, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -236.34498596191406, |
|
"rewards/margins": 249.7677001953125, |
|
"rewards/real": 13.422710418701172, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 30.892653140615913, |
|
"learning_rate": 4.569088319088319e-07, |
|
"logits/generated": -1.6592109203338623, |
|
"logits/oppo_generated": -2.6892812252044678, |
|
"logits/oppo_real": -2.527797222137451, |
|
"logits/real": -2.02337646484375, |
|
"logps/generated": -255.10365295410156, |
|
"logps/oppo_gen": -56.507102966308594, |
|
"logps/oppo_real": -203.99942016601562, |
|
"logps/real": -214.63226318359375, |
|
"loss": 0.2654, |
|
"loss/gen": 0.4655250906944275, |
|
"loss/real": 0.16440606117248535, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -198.5965576171875, |
|
"rewards/margins": 187.96371459960938, |
|
"rewards/real": -10.632861137390137, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"grad_norm": 29.88231515108999, |
|
"learning_rate": 4.5655270655270654e-07, |
|
"logits/generated": -2.1158018112182617, |
|
"logits/oppo_generated": -2.892515182495117, |
|
"logits/oppo_real": -2.87583589553833, |
|
"logits/real": -2.2688913345336914, |
|
"logps/generated": -284.7113342285156, |
|
"logps/oppo_gen": -70.63409423828125, |
|
"logps/oppo_real": -236.45480346679688, |
|
"logps/real": -220.61276245117188, |
|
"loss": 0.2059, |
|
"loss/gen": 0.16342338919639587, |
|
"loss/real": 0.02121652662754059, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -214.07723999023438, |
|
"rewards/margins": 229.91929626464844, |
|
"rewards/real": 15.842063903808594, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 38.31896420283467, |
|
"learning_rate": 4.5619658119658116e-07, |
|
"logits/generated": -1.7529704570770264, |
|
"logits/oppo_generated": -2.2372124195098877, |
|
"logits/oppo_real": -2.6531500816345215, |
|
"logits/real": -1.4958126544952393, |
|
"logps/generated": -241.34754943847656, |
|
"logps/oppo_gen": -49.9699821472168, |
|
"logps/oppo_real": -257.7629699707031, |
|
"logps/real": -258.93359375, |
|
"loss": 0.2438, |
|
"loss/gen": 0.5050817728042603, |
|
"loss/real": 0.21669313311576843, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -191.3775634765625, |
|
"rewards/margins": 190.20692443847656, |
|
"rewards/real": -1.170628547668457, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"grad_norm": 59.104889302435254, |
|
"learning_rate": 4.5584045584045584e-07, |
|
"logits/generated": -1.8979967832565308, |
|
"logits/oppo_generated": -2.6594979763031006, |
|
"logits/oppo_real": -2.72336483001709, |
|
"logits/real": -1.9622243642807007, |
|
"logps/generated": -281.2121887207031, |
|
"logps/oppo_gen": -69.47285461425781, |
|
"logps/oppo_real": -203.925048828125, |
|
"logps/real": -180.85427856445312, |
|
"loss": 0.2324, |
|
"loss/gen": 0.22617992758750916, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -211.7393341064453, |
|
"rewards/margins": 234.8101043701172, |
|
"rewards/real": 23.070770263671875, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 53.67125863911009, |
|
"learning_rate": 4.5548433048433046e-07, |
|
"logits/generated": -2.1446683406829834, |
|
"logits/oppo_generated": -2.84741473197937, |
|
"logits/oppo_real": -2.9322423934936523, |
|
"logits/real": -2.304154396057129, |
|
"logps/generated": -333.1722106933594, |
|
"logps/oppo_gen": -72.28129577636719, |
|
"logps/oppo_real": -342.0706787109375, |
|
"logps/real": -361.2486572265625, |
|
"loss": 0.2267, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.2817186713218689, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -260.8908996582031, |
|
"rewards/margins": 241.7129364013672, |
|
"rewards/real": -19.177961349487305, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 41.20307611629637, |
|
"learning_rate": 4.551282051282051e-07, |
|
"logits/generated": -2.2117128372192383, |
|
"logits/oppo_generated": -2.8123486042022705, |
|
"logits/oppo_real": -2.9484448432922363, |
|
"logits/real": -2.303635597229004, |
|
"logps/generated": -301.5762634277344, |
|
"logps/oppo_gen": -78.67784118652344, |
|
"logps/oppo_real": -224.94638061523438, |
|
"logps/real": -219.7537841796875, |
|
"loss": 0.2616, |
|
"loss/gen": 0.048871323466300964, |
|
"loss/real": 0.12874145805835724, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -222.8984375, |
|
"rewards/margins": 228.09103393554688, |
|
"rewards/real": 5.192612648010254, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"grad_norm": 64.1642332466071, |
|
"learning_rate": 4.5477207977207976e-07, |
|
"logits/generated": -2.0593137741088867, |
|
"logits/oppo_generated": -2.6430654525756836, |
|
"logits/oppo_real": -2.7417783737182617, |
|
"logits/real": -2.0025062561035156, |
|
"logps/generated": -283.02716064453125, |
|
"logps/oppo_gen": -63.871150970458984, |
|
"logps/oppo_real": -224.14703369140625, |
|
"logps/real": -193.3045654296875, |
|
"loss": 0.1827, |
|
"loss/gen": 0.320221483707428, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -219.15602111816406, |
|
"rewards/margins": 249.99851989746094, |
|
"rewards/real": 30.842487335205078, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 44.01251874910847, |
|
"learning_rate": 4.544159544159544e-07, |
|
"logits/generated": -2.1278610229492188, |
|
"logits/oppo_generated": -2.757966995239258, |
|
"logits/oppo_real": -2.906935691833496, |
|
"logits/real": -2.026312828063965, |
|
"logps/generated": -245.8863525390625, |
|
"logps/oppo_gen": -53.980133056640625, |
|
"logps/oppo_real": -168.99293518066406, |
|
"logps/real": -175.02117919921875, |
|
"loss": 0.1984, |
|
"loss/gen": 0.3063175082206726, |
|
"loss/real": 0.224711611866951, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -191.90621948242188, |
|
"rewards/margins": 185.87799072265625, |
|
"rewards/real": -6.028232574462891, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"grad_norm": 89.25379401512157, |
|
"learning_rate": 4.5405982905982905e-07, |
|
"logits/generated": -1.7046711444854736, |
|
"logits/oppo_generated": -2.34848690032959, |
|
"logits/oppo_real": -2.549453020095825, |
|
"logits/real": -1.6289647817611694, |
|
"logps/generated": -291.48760986328125, |
|
"logps/oppo_gen": -41.99907684326172, |
|
"logps/oppo_real": -137.05735778808594, |
|
"logps/real": -138.64942932128906, |
|
"loss": 0.2167, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.2769484519958496, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -249.48851013183594, |
|
"rewards/margins": 247.8964385986328, |
|
"rewards/real": -1.5920724868774414, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 85.32059828779624, |
|
"learning_rate": 4.537037037037037e-07, |
|
"logits/generated": -1.9084529876708984, |
|
"logits/oppo_generated": -2.5094847679138184, |
|
"logits/oppo_real": -2.6891722679138184, |
|
"logits/real": -1.900597095489502, |
|
"logps/generated": -450.51837158203125, |
|
"logps/oppo_gen": -68.40258026123047, |
|
"logps/oppo_real": -223.42794799804688, |
|
"logps/real": -218.4866943359375, |
|
"loss": 0.2427, |
|
"loss/gen": 0.2575632333755493, |
|
"loss/real": 0.20119953155517578, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -382.1158142089844, |
|
"rewards/margins": 387.05706787109375, |
|
"rewards/real": 4.941247940063477, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"grad_norm": 63.92922684439313, |
|
"learning_rate": 4.533475783475783e-07, |
|
"logits/generated": -1.9882678985595703, |
|
"logits/oppo_generated": -2.8935999870300293, |
|
"logits/oppo_real": -2.775484561920166, |
|
"logits/real": -2.3655059337615967, |
|
"logps/generated": -269.10772705078125, |
|
"logps/oppo_gen": -50.93283462524414, |
|
"logps/oppo_real": -316.0002136230469, |
|
"logps/real": -292.96356201171875, |
|
"loss": 0.2371, |
|
"loss/gen": 0.10934163630008698, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -218.1748809814453, |
|
"rewards/margins": 241.2115478515625, |
|
"rewards/real": 23.036657333374023, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 109.75823929778575, |
|
"learning_rate": 4.5299145299145297e-07, |
|
"logits/generated": -2.2796077728271484, |
|
"logits/oppo_generated": -2.8526816368103027, |
|
"logits/oppo_real": -3.2386014461517334, |
|
"logits/real": -2.2865777015686035, |
|
"logps/generated": -342.9628601074219, |
|
"logps/oppo_gen": -113.54923248291016, |
|
"logps/oppo_real": -351.7125549316406, |
|
"logps/real": -339.4488220214844, |
|
"loss": 0.2334, |
|
"loss/gen": 0.26814204454421997, |
|
"loss/real": 0.026034392416477203, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -229.4136199951172, |
|
"rewards/margins": 241.67733764648438, |
|
"rewards/real": 12.263713836669922, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 98.73111963497122, |
|
"learning_rate": 4.5263532763532765e-07, |
|
"logits/generated": -2.3369998931884766, |
|
"logits/oppo_generated": -2.9850940704345703, |
|
"logits/oppo_real": -3.0315611362457275, |
|
"logits/real": -2.358889102935791, |
|
"logps/generated": -314.4837341308594, |
|
"logps/oppo_gen": -61.65489196777344, |
|
"logps/oppo_real": -151.10653686523438, |
|
"logps/real": -144.9267578125, |
|
"loss": 0.2096, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.1058526411652565, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -252.82882690429688, |
|
"rewards/margins": 259.00860595703125, |
|
"rewards/real": 6.179767608642578, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"grad_norm": 43.88860577412861, |
|
"learning_rate": 4.522792022792022e-07, |
|
"logits/generated": -2.205157995223999, |
|
"logits/oppo_generated": -2.891350746154785, |
|
"logits/oppo_real": -3.0990657806396484, |
|
"logits/real": -2.274484634399414, |
|
"logps/generated": -485.29437255859375, |
|
"logps/oppo_gen": -212.02532958984375, |
|
"logps/oppo_real": -549.8078002929688, |
|
"logps/real": -530.3729248046875, |
|
"loss": 0.2064, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.04228302091360092, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -273.2690734863281, |
|
"rewards/margins": 292.70391845703125, |
|
"rewards/real": 19.434844970703125, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 31.48113836496238, |
|
"learning_rate": 4.519230769230769e-07, |
|
"logits/generated": -1.9153656959533691, |
|
"logits/oppo_generated": -2.861656904220581, |
|
"logits/oppo_real": -2.749734878540039, |
|
"logits/real": -2.23591947555542, |
|
"logps/generated": -283.47076416015625, |
|
"logps/oppo_gen": -52.08341598510742, |
|
"logps/oppo_real": -268.2560119628906, |
|
"logps/real": -241.11734008789062, |
|
"loss": 0.146, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -231.38734436035156, |
|
"rewards/margins": 258.5260314941406, |
|
"rewards/real": 27.138673782348633, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"grad_norm": 55.337732592124695, |
|
"learning_rate": 4.5156695156695157e-07, |
|
"logits/generated": -2.0015411376953125, |
|
"logits/oppo_generated": -2.8331031799316406, |
|
"logits/oppo_real": -2.8462958335876465, |
|
"logits/real": -2.170417308807373, |
|
"logps/generated": -387.2781677246094, |
|
"logps/oppo_gen": -78.92254638671875, |
|
"logps/oppo_real": -224.86373901367188, |
|
"logps/real": -244.69886779785156, |
|
"loss": 0.2824, |
|
"loss/gen": 0.0006727427244186401, |
|
"loss/real": 0.33926331996917725, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -308.3556213378906, |
|
"rewards/margins": 288.5204772949219, |
|
"rewards/real": -19.835121154785156, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 143.149881843734, |
|
"learning_rate": 4.512108262108262e-07, |
|
"logits/generated": -2.24777889251709, |
|
"logits/oppo_generated": -2.879185199737549, |
|
"logits/oppo_real": -2.873112678527832, |
|
"logits/real": -2.3650031089782715, |
|
"logps/generated": -310.72698974609375, |
|
"logps/oppo_gen": -49.27460479736328, |
|
"logps/oppo_real": -375.43463134765625, |
|
"logps/real": -367.69134521484375, |
|
"loss": 0.2742, |
|
"loss/gen": 0.003482311964035034, |
|
"loss/real": 0.10677148401737213, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -261.4523620605469, |
|
"rewards/margins": 269.1956481933594, |
|
"rewards/real": 7.7432966232299805, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 105.91241867546742, |
|
"learning_rate": 4.5085470085470087e-07, |
|
"logits/generated": -2.244475841522217, |
|
"logits/oppo_generated": -3.0462043285369873, |
|
"logits/oppo_real": -3.1089582443237305, |
|
"logits/real": -2.443125009536743, |
|
"logps/generated": -331.522705078125, |
|
"logps/oppo_gen": -77.79332733154297, |
|
"logps/oppo_real": -319.2231750488281, |
|
"logps/real": -294.63909912109375, |
|
"loss": 0.2243, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -253.72940063476562, |
|
"rewards/margins": 278.3134765625, |
|
"rewards/real": 24.58407974243164, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"grad_norm": 39.86911416934715, |
|
"learning_rate": 4.5049857549857543e-07, |
|
"logits/generated": -2.201946973800659, |
|
"logits/oppo_generated": -2.815687656402588, |
|
"logits/oppo_real": -2.9501237869262695, |
|
"logits/real": -2.219613552093506, |
|
"logps/generated": -352.42193603515625, |
|
"logps/oppo_gen": -103.51431274414062, |
|
"logps/oppo_real": -308.8333435058594, |
|
"logps/real": -304.56707763671875, |
|
"loss": 0.2701, |
|
"loss/gen": 0.01318824291229248, |
|
"loss/real": 0.14666341245174408, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -248.90762329101562, |
|
"rewards/margins": 253.17388916015625, |
|
"rewards/real": 4.266262054443359, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 52.815620285042094, |
|
"learning_rate": 4.501424501424501e-07, |
|
"logits/generated": -2.0335657596588135, |
|
"logits/oppo_generated": -2.779146194458008, |
|
"logits/oppo_real": -2.8336267471313477, |
|
"logits/real": -2.1648244857788086, |
|
"logps/generated": -302.6240234375, |
|
"logps/oppo_gen": -72.71639251708984, |
|
"logps/oppo_real": -196.57557678222656, |
|
"logps/real": -177.44334411621094, |
|
"loss": 0.1641, |
|
"loss/gen": 0.03493678569793701, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -229.90762329101562, |
|
"rewards/margins": 249.03985595703125, |
|
"rewards/real": 19.132234573364258, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 53.27667685652888, |
|
"learning_rate": 4.497863247863248e-07, |
|
"logits/generated": -2.264275074005127, |
|
"logits/oppo_generated": -2.8425636291503906, |
|
"logits/oppo_real": -2.9093685150146484, |
|
"logits/real": -2.2906460762023926, |
|
"logps/generated": -324.7326354980469, |
|
"logps/oppo_gen": -95.93893432617188, |
|
"logps/oppo_real": -207.11392211914062, |
|
"logps/real": -191.6349334716797, |
|
"loss": 0.2242, |
|
"loss/gen": 0.2739104628562927, |
|
"loss/real": 0.014250755310058594, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -228.793701171875, |
|
"rewards/margins": 244.272705078125, |
|
"rewards/real": 15.478999137878418, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 71.11992993242998, |
|
"learning_rate": 4.494301994301994e-07, |
|
"logits/generated": -2.115266799926758, |
|
"logits/oppo_generated": -2.8224010467529297, |
|
"logits/oppo_real": -2.778409957885742, |
|
"logits/real": -2.3558645248413086, |
|
"logps/generated": -344.7413024902344, |
|
"logps/oppo_gen": -88.16463470458984, |
|
"logps/oppo_real": -239.9169921875, |
|
"logps/real": -255.3130645751953, |
|
"loss": 0.2213, |
|
"loss/gen": 0.029619291424751282, |
|
"loss/real": 0.297105997800827, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -256.57666015625, |
|
"rewards/margins": 241.18060302734375, |
|
"rewards/real": -15.396068572998047, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"grad_norm": 86.04380073271548, |
|
"learning_rate": 4.4907407407407403e-07, |
|
"logits/generated": -2.3114571571350098, |
|
"logits/oppo_generated": -2.9657952785491943, |
|
"logits/oppo_real": -2.9425137042999268, |
|
"logits/real": -2.491687059402466, |
|
"logps/generated": -314.3035583496094, |
|
"logps/oppo_gen": -76.42547607421875, |
|
"logps/oppo_real": -261.8043518066406, |
|
"logps/real": -245.9702911376953, |
|
"loss": 0.3062, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -237.87808227539062, |
|
"rewards/margins": 253.712158203125, |
|
"rewards/real": 15.834070205688477, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 106.78727210850083, |
|
"learning_rate": 4.487179487179487e-07, |
|
"logits/generated": -1.8033708333969116, |
|
"logits/oppo_generated": -2.6656646728515625, |
|
"logits/oppo_real": -2.512063980102539, |
|
"logits/real": -2.0389981269836426, |
|
"logps/generated": -216.2008056640625, |
|
"logps/oppo_gen": -61.16596603393555, |
|
"logps/oppo_real": -89.70797729492188, |
|
"logps/real": -71.5185317993164, |
|
"loss": 0.1824, |
|
"loss/gen": 0.8215519785881042, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -155.03482055664062, |
|
"rewards/margins": 173.22427368164062, |
|
"rewards/real": 18.189449310302734, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 101.12620711578676, |
|
"learning_rate": 4.4836182336182333e-07, |
|
"logits/generated": -1.9014736413955688, |
|
"logits/oppo_generated": -2.679591655731201, |
|
"logits/oppo_real": -2.5152084827423096, |
|
"logits/real": -2.1934709548950195, |
|
"logps/generated": -450.9914855957031, |
|
"logps/oppo_gen": -134.39280700683594, |
|
"logps/oppo_real": -353.8466491699219, |
|
"logps/real": -347.7996520996094, |
|
"loss": 0.2277, |
|
"loss/gen": 0.11133264005184174, |
|
"loss/real": 0.03321786969900131, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -316.59869384765625, |
|
"rewards/margins": 322.64569091796875, |
|
"rewards/real": 6.047005653381348, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"grad_norm": 35.871571659467506, |
|
"learning_rate": 4.48005698005698e-07, |
|
"logits/generated": -2.210495710372925, |
|
"logits/oppo_generated": -2.8852622509002686, |
|
"logits/oppo_real": -2.9888343811035156, |
|
"logits/real": -2.316572427749634, |
|
"logps/generated": -349.71429443359375, |
|
"logps/oppo_gen": -86.57408142089844, |
|
"logps/oppo_real": -353.78594970703125, |
|
"logps/real": -361.350830078125, |
|
"loss": 0.1252, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.25374865531921387, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -263.14019775390625, |
|
"rewards/margins": 255.5753631591797, |
|
"rewards/real": -7.564859867095947, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 30.56805032519401, |
|
"learning_rate": 4.476495726495726e-07, |
|
"logits/generated": -2.2187647819519043, |
|
"logits/oppo_generated": -2.894904136657715, |
|
"logits/oppo_real": -2.8833250999450684, |
|
"logits/real": -2.351996421813965, |
|
"logps/generated": -333.04010009765625, |
|
"logps/oppo_gen": -97.552490234375, |
|
"logps/oppo_real": -446.60357666015625, |
|
"logps/real": -427.14239501953125, |
|
"loss": 0.1785, |
|
"loss/gen": 0.018305152654647827, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -235.4876251220703, |
|
"rewards/margins": 254.9488067626953, |
|
"rewards/real": 19.46118927001953, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"grad_norm": 65.2351178849916, |
|
"learning_rate": 4.4729344729344725e-07, |
|
"logits/generated": -2.244483709335327, |
|
"logits/oppo_generated": -2.9238195419311523, |
|
"logits/oppo_real": -2.928109645843506, |
|
"logits/real": -2.411482810974121, |
|
"logps/generated": -433.5346984863281, |
|
"logps/oppo_gen": -99.34373474121094, |
|
"logps/oppo_real": -381.1275634765625, |
|
"logps/real": -367.78045654296875, |
|
"loss": 0.2426, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.021128714084625244, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -334.19097900390625, |
|
"rewards/margins": 347.5380859375, |
|
"rewards/real": 13.347097396850586, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 73.61953834262512, |
|
"learning_rate": 4.469373219373219e-07, |
|
"logits/generated": -1.8134526014328003, |
|
"logits/oppo_generated": -2.7080626487731934, |
|
"logits/oppo_real": -2.5767087936401367, |
|
"logits/real": -2.030604124069214, |
|
"logps/generated": -408.92938232421875, |
|
"logps/oppo_gen": -46.502037048339844, |
|
"logps/oppo_real": -149.05059814453125, |
|
"logps/real": -196.5388641357422, |
|
"loss": 0.2718, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.5971862077713013, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -362.4273376464844, |
|
"rewards/margins": 314.9390869140625, |
|
"rewards/real": -47.488250732421875, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 37.51273384059488, |
|
"learning_rate": 4.465811965811966e-07, |
|
"logits/generated": -2.0432450771331787, |
|
"logits/oppo_generated": -2.9217922687530518, |
|
"logits/oppo_real": -3.0358145236968994, |
|
"logits/real": -2.355529546737671, |
|
"logps/generated": -332.09490966796875, |
|
"logps/oppo_gen": -72.13301849365234, |
|
"logps/oppo_real": -295.51861572265625, |
|
"logps/real": -308.53802490234375, |
|
"loss": 0.2465, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.3299695551395416, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -259.96185302734375, |
|
"rewards/margins": 246.94244384765625, |
|
"rewards/real": -13.019420623779297, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"grad_norm": 54.30798222723571, |
|
"learning_rate": 4.4622507122507117e-07, |
|
"logits/generated": -2.0533862113952637, |
|
"logits/oppo_generated": -2.7406344413757324, |
|
"logits/oppo_real": -2.799593925476074, |
|
"logits/real": -2.181865930557251, |
|
"logps/generated": -351.9316101074219, |
|
"logps/oppo_gen": -102.60955810546875, |
|
"logps/oppo_real": -305.8299255371094, |
|
"logps/real": -278.4617004394531, |
|
"loss": 0.1727, |
|
"loss/gen": 0.01339229941368103, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -249.32205200195312, |
|
"rewards/margins": 276.6903076171875, |
|
"rewards/real": 27.368215560913086, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 31.494153645830643, |
|
"learning_rate": 4.4586894586894584e-07, |
|
"logits/generated": -2.32715106010437, |
|
"logits/oppo_generated": -2.8220396041870117, |
|
"logits/oppo_real": -3.0663821697235107, |
|
"logits/real": -2.326672077178955, |
|
"logps/generated": -296.7817077636719, |
|
"logps/oppo_gen": -80.95722961425781, |
|
"logps/oppo_real": -339.0364074707031, |
|
"logps/real": -326.7790832519531, |
|
"loss": 0.1931, |
|
"loss/gen": 0.2630905210971832, |
|
"loss/real": 0.00023437291383743286, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -215.82449340820312, |
|
"rewards/margins": 228.08184814453125, |
|
"rewards/real": 12.25734806060791, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"grad_norm": 54.0069326966166, |
|
"learning_rate": 4.455128205128205e-07, |
|
"logits/generated": -2.081082344055176, |
|
"logits/oppo_generated": -2.8528313636779785, |
|
"logits/oppo_real": -2.9469070434570312, |
|
"logits/real": -2.2097737789154053, |
|
"logps/generated": -291.0604248046875, |
|
"logps/oppo_gen": -55.95906066894531, |
|
"logps/oppo_real": -228.37322998046875, |
|
"logps/real": -207.25762939453125, |
|
"loss": 0.185, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -235.10134887695312, |
|
"rewards/margins": 256.2169494628906, |
|
"rewards/real": 21.115596771240234, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 49.09352870335229, |
|
"learning_rate": 4.4515669515669514e-07, |
|
"logits/generated": -2.103522539138794, |
|
"logits/oppo_generated": -2.759657859802246, |
|
"logits/oppo_real": -2.7739434242248535, |
|
"logits/real": -2.251528263092041, |
|
"logps/generated": -327.42047119140625, |
|
"logps/oppo_gen": -55.900001525878906, |
|
"logps/oppo_real": -240.51673889160156, |
|
"logps/real": -249.0882568359375, |
|
"loss": 0.197, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.29675740003585815, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -271.5204772949219, |
|
"rewards/margins": 262.9489440917969, |
|
"rewards/real": -8.571529388427734, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"grad_norm": 51.20193296598406, |
|
"learning_rate": 4.448005698005698e-07, |
|
"logits/generated": -2.0290396213531494, |
|
"logits/oppo_generated": -2.714049816131592, |
|
"logits/oppo_real": -2.821863889694214, |
|
"logits/real": -2.0799052715301514, |
|
"logps/generated": -250.7839813232422, |
|
"logps/oppo_gen": -61.66150665283203, |
|
"logps/oppo_real": -281.81561279296875, |
|
"logps/real": -291.30224609375, |
|
"loss": 0.2763, |
|
"loss/gen": 0.3449553847312927, |
|
"loss/real": 0.29574069380760193, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -189.12246704101562, |
|
"rewards/margins": 179.63583374023438, |
|
"rewards/real": -9.486623764038086, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 61.67454112871453, |
|
"learning_rate": 4.444444444444444e-07, |
|
"logits/generated": -1.8745107650756836, |
|
"logits/oppo_generated": -2.7336645126342773, |
|
"logits/oppo_real": -2.6636435985565186, |
|
"logits/real": -2.19765043258667, |
|
"logps/generated": -274.8359375, |
|
"logps/oppo_gen": -66.04891204833984, |
|
"logps/oppo_real": -343.6158447265625, |
|
"logps/real": -310.6270751953125, |
|
"loss": 0.2015, |
|
"loss/gen": 0.22997678816318512, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -208.78701782226562, |
|
"rewards/margins": 241.7758026123047, |
|
"rewards/real": 32.98878479003906, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 53.72602799374224, |
|
"learning_rate": 4.4408831908831906e-07, |
|
"logits/generated": -2.1320905685424805, |
|
"logits/oppo_generated": -3.0542874336242676, |
|
"logits/oppo_real": -2.803119659423828, |
|
"logits/real": -2.5108633041381836, |
|
"logps/generated": -288.25958251953125, |
|
"logps/oppo_gen": -81.553955078125, |
|
"logps/oppo_real": -376.17071533203125, |
|
"logps/real": -342.6852722167969, |
|
"loss": 0.2462, |
|
"loss/gen": 0.2779223918914795, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -206.70559692382812, |
|
"rewards/margins": 240.19102478027344, |
|
"rewards/real": 33.485435485839844, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"grad_norm": 25.261345155831098, |
|
"learning_rate": 4.4373219373219373e-07, |
|
"logits/generated": -2.1422460079193115, |
|
"logits/oppo_generated": -2.791293144226074, |
|
"logits/oppo_real": -2.8689441680908203, |
|
"logits/real": -2.3548340797424316, |
|
"logps/generated": -355.26690673828125, |
|
"logps/oppo_gen": -90.10079956054688, |
|
"logps/oppo_real": -387.6597900390625, |
|
"logps/real": -367.0665588378906, |
|
"loss": 0.2392, |
|
"loss/gen": 0.17274703085422516, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -265.1661071777344, |
|
"rewards/margins": 285.75933837890625, |
|
"rewards/real": 20.593231201171875, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 29.314516563430757, |
|
"learning_rate": 4.4337606837606836e-07, |
|
"logits/generated": -2.09847354888916, |
|
"logits/oppo_generated": -2.8356850147247314, |
|
"logits/oppo_real": -2.917833089828491, |
|
"logits/real": -2.1983418464660645, |
|
"logps/generated": -326.11285400390625, |
|
"logps/oppo_gen": -76.40264892578125, |
|
"logps/oppo_real": -278.172607421875, |
|
"logps/real": -253.1783447265625, |
|
"loss": 0.1414, |
|
"loss/gen": 0.12514609098434448, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -249.710205078125, |
|
"rewards/margins": 274.7044677734375, |
|
"rewards/real": 24.9942684173584, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"grad_norm": 48.53973837148151, |
|
"learning_rate": 4.43019943019943e-07, |
|
"logits/generated": -2.2969937324523926, |
|
"logits/oppo_generated": -3.0011539459228516, |
|
"logits/oppo_real": -3.069876194000244, |
|
"logits/real": -2.477539539337158, |
|
"logps/generated": -303.3049011230469, |
|
"logps/oppo_gen": -69.13575744628906, |
|
"logps/oppo_real": -340.70343017578125, |
|
"logps/real": -357.84661865234375, |
|
"loss": 0.2047, |
|
"loss/gen": 0.19307354092597961, |
|
"loss/real": 0.3465298116207123, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -234.16915893554688, |
|
"rewards/margins": 217.02597045898438, |
|
"rewards/real": -17.143173217773438, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 40.12700023003137, |
|
"learning_rate": 4.4266381766381765e-07, |
|
"logits/generated": -2.04579758644104, |
|
"logits/oppo_generated": -2.821411609649658, |
|
"logits/oppo_real": -2.9697532653808594, |
|
"logits/real": -2.3006458282470703, |
|
"logps/generated": -352.8709411621094, |
|
"logps/oppo_gen": -94.25292205810547, |
|
"logps/oppo_real": -449.1705322265625, |
|
"logps/real": -422.84283447265625, |
|
"loss": 0.134, |
|
"loss/gen": 0.19948835670948029, |
|
"loss/real": 0.013253934681415558, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -258.6180419921875, |
|
"rewards/margins": 284.94573974609375, |
|
"rewards/real": 26.32770347595215, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 55.41589471188524, |
|
"learning_rate": 4.423076923076923e-07, |
|
"logits/generated": -2.029297351837158, |
|
"logits/oppo_generated": -2.9498441219329834, |
|
"logits/oppo_real": -2.889374017715454, |
|
"logits/real": -2.3880996704101562, |
|
"logps/generated": -370.95904541015625, |
|
"logps/oppo_gen": -93.28401184082031, |
|
"logps/oppo_real": -446.9027099609375, |
|
"logps/real": -425.0364074707031, |
|
"loss": 0.1887, |
|
"loss/gen": 0.0018385052680969238, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -277.6750183105469, |
|
"rewards/margins": 299.541259765625, |
|
"rewards/real": 21.866281509399414, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 56.23495458509325, |
|
"learning_rate": 4.4195156695156695e-07, |
|
"logits/generated": -1.6252273321151733, |
|
"logits/oppo_generated": -2.5877699851989746, |
|
"logits/oppo_real": -2.4145617485046387, |
|
"logits/real": -1.9977871179580688, |
|
"logps/generated": -345.75006103515625, |
|
"logps/oppo_gen": -58.147544860839844, |
|
"logps/oppo_real": -256.63494873046875, |
|
"logps/real": -243.7563018798828, |
|
"loss": 0.2266, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.011003687977790833, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -287.6025085449219, |
|
"rewards/margins": 300.481201171875, |
|
"rewards/real": 12.878662109375, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 66.24101870135868, |
|
"learning_rate": 4.4159544159544157e-07, |
|
"logits/generated": -2.048314094543457, |
|
"logits/oppo_generated": -2.825096607208252, |
|
"logits/oppo_real": -2.919394016265869, |
|
"logits/real": -2.1845545768737793, |
|
"logps/generated": -257.5550842285156, |
|
"logps/oppo_gen": -62.71122360229492, |
|
"logps/oppo_real": -234.44354248046875, |
|
"logps/real": -217.26864624023438, |
|
"loss": 0.1972, |
|
"loss/gen": 0.24772684276103973, |
|
"loss/real": 0.03300228714942932, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -194.84384155273438, |
|
"rewards/margins": 212.01873779296875, |
|
"rewards/real": 17.17490005493164, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"grad_norm": 45.537051363422734, |
|
"learning_rate": 4.412393162393162e-07, |
|
"logits/generated": -2.0483438968658447, |
|
"logits/oppo_generated": -2.681910276412964, |
|
"logits/oppo_real": -2.8930723667144775, |
|
"logits/real": -2.0234827995300293, |
|
"logps/generated": -298.2679138183594, |
|
"logps/oppo_gen": -69.35714721679688, |
|
"logps/oppo_real": -321.68878173828125, |
|
"logps/real": -300.1239013671875, |
|
"loss": 0.1831, |
|
"loss/gen": 0.39896392822265625, |
|
"loss/real": 0.027880370616912842, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -228.91075134277344, |
|
"rewards/margins": 250.47564697265625, |
|
"rewards/real": 21.564884185791016, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 51.88816192212102, |
|
"learning_rate": 4.4088319088319087e-07, |
|
"logits/generated": -1.9750864505767822, |
|
"logits/oppo_generated": -2.910146951675415, |
|
"logits/oppo_real": -2.842686653137207, |
|
"logits/real": -2.2580361366271973, |
|
"logps/generated": -364.1310119628906, |
|
"logps/oppo_gen": -55.29602813720703, |
|
"logps/oppo_real": -188.457763671875, |
|
"logps/real": -190.8536834716797, |
|
"loss": 0.1574, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.2326948642730713, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -308.8349609375, |
|
"rewards/margins": 306.4390869140625, |
|
"rewards/real": -2.3959202766418457, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"grad_norm": 49.47076354082783, |
|
"learning_rate": 4.4052706552706555e-07, |
|
"logits/generated": -2.08099365234375, |
|
"logits/oppo_generated": -2.9482345581054688, |
|
"logits/oppo_real": -3.0109448432922363, |
|
"logits/real": -2.36570405960083, |
|
"logps/generated": -295.0722961425781, |
|
"logps/oppo_gen": -70.6409912109375, |
|
"logps/oppo_real": -375.189697265625, |
|
"logps/real": -361.89434814453125, |
|
"loss": 0.1832, |
|
"loss/gen": 0.013277322053909302, |
|
"loss/real": 0.03908447176218033, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -224.4313201904297, |
|
"rewards/margins": 237.72665405273438, |
|
"rewards/real": 13.295326232910156, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 23.347891198939145, |
|
"learning_rate": 4.4017094017094017e-07, |
|
"logits/generated": -2.0084404945373535, |
|
"logits/oppo_generated": -2.7811834812164307, |
|
"logits/oppo_real": -2.923962116241455, |
|
"logits/real": -2.1404595375061035, |
|
"logps/generated": -311.7547607421875, |
|
"logps/oppo_gen": -71.71026611328125, |
|
"logps/oppo_real": -353.846923828125, |
|
"logps/real": -363.43988037109375, |
|
"loss": 0.1818, |
|
"loss/gen": 0.030606284737586975, |
|
"loss/real": 0.2629862129688263, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -240.0445098876953, |
|
"rewards/margins": 230.45156860351562, |
|
"rewards/real": -9.592939376831055, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 19.88992382650619, |
|
"learning_rate": 4.398148148148148e-07, |
|
"logits/generated": -2.0685057640075684, |
|
"logits/oppo_generated": -2.8043360710144043, |
|
"logits/oppo_real": -3.0211949348449707, |
|
"logits/real": -2.244368314743042, |
|
"logps/generated": -308.2628173828125, |
|
"logps/oppo_gen": -77.71004486083984, |
|
"logps/oppo_real": -389.77301025390625, |
|
"logps/real": -367.9434509277344, |
|
"loss": 0.1625, |
|
"loss/gen": 0.025741413235664368, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -230.55276489257812, |
|
"rewards/margins": 252.38232421875, |
|
"rewards/real": 21.829570770263672, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"grad_norm": 25.132100938015384, |
|
"learning_rate": 4.394586894586894e-07, |
|
"logits/generated": -1.9696589708328247, |
|
"logits/oppo_generated": -2.7760987281799316, |
|
"logits/oppo_real": -2.740163803100586, |
|
"logits/real": -2.227613925933838, |
|
"logps/generated": -362.2425231933594, |
|
"logps/oppo_gen": -88.69313049316406, |
|
"logps/oppo_real": -338.8006591796875, |
|
"logps/real": -327.68731689453125, |
|
"loss": 0.1775, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.09456821531057358, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -273.5494384765625, |
|
"rewards/margins": 284.6627502441406, |
|
"rewards/real": 11.113346099853516, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 79.51874594730006, |
|
"learning_rate": 4.391025641025641e-07, |
|
"logits/generated": -1.8997169733047485, |
|
"logits/oppo_generated": -2.7127938270568848, |
|
"logits/oppo_real": -2.803234577178955, |
|
"logits/real": -2.1146082878112793, |
|
"logps/generated": -387.34759521484375, |
|
"logps/oppo_gen": -85.75541687011719, |
|
"logps/oppo_real": -242.4071807861328, |
|
"logps/real": -236.056884765625, |
|
"loss": 0.1955, |
|
"loss/gen": 0.0035225003957748413, |
|
"loss/real": 0.07803569734096527, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -301.59222412109375, |
|
"rewards/margins": 307.9425048828125, |
|
"rewards/real": 6.350289821624756, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"grad_norm": 82.73605256797819, |
|
"learning_rate": 4.3874643874643876e-07, |
|
"logits/generated": -2.0518431663513184, |
|
"logits/oppo_generated": -2.995426654815674, |
|
"logits/oppo_real": -2.8803281784057617, |
|
"logits/real": -2.3870060443878174, |
|
"logps/generated": -346.0346984863281, |
|
"logps/oppo_gen": -68.82854461669922, |
|
"logps/oppo_real": -337.844482421875, |
|
"logps/real": -309.0092468261719, |
|
"loss": 0.1528, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -277.2061767578125, |
|
"rewards/margins": 306.04144287109375, |
|
"rewards/real": 28.835275650024414, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 35.13018825672765, |
|
"learning_rate": 4.3839031339031333e-07, |
|
"logits/generated": -2.083667278289795, |
|
"logits/oppo_generated": -2.6126418113708496, |
|
"logits/oppo_real": -3.0222294330596924, |
|
"logits/real": -2.0695085525512695, |
|
"logps/generated": -289.85260009765625, |
|
"logps/oppo_gen": -56.36054992675781, |
|
"logps/oppo_real": -325.3075256347656, |
|
"logps/real": -330.01385498046875, |
|
"loss": 0.2521, |
|
"loss/gen": 0.009135901927947998, |
|
"loss/real": 0.25433236360549927, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -233.49203491210938, |
|
"rewards/margins": 228.78570556640625, |
|
"rewards/real": -4.706315994262695, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"grad_norm": 27.606224618649186, |
|
"learning_rate": 4.38034188034188e-07, |
|
"logits/generated": -2.159607410430908, |
|
"logits/oppo_generated": -3.026592254638672, |
|
"logits/oppo_real": -2.9974026679992676, |
|
"logits/real": -2.492272138595581, |
|
"logps/generated": -330.615478515625, |
|
"logps/oppo_gen": -81.62860107421875, |
|
"logps/oppo_real": -354.01513671875, |
|
"logps/real": -344.7056579589844, |
|
"loss": 0.2107, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.024997137486934662, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -248.98684692382812, |
|
"rewards/margins": 258.29632568359375, |
|
"rewards/real": 9.309473991394043, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 15.104872244654826, |
|
"learning_rate": 4.376780626780627e-07, |
|
"logits/generated": -2.0585901737213135, |
|
"logits/oppo_generated": -2.86299991607666, |
|
"logits/oppo_real": -2.897392749786377, |
|
"logits/real": -2.3068199157714844, |
|
"logps/generated": -285.48638916015625, |
|
"logps/oppo_gen": -55.654396057128906, |
|
"logps/oppo_real": -286.4037170410156, |
|
"logps/real": -298.23529052734375, |
|
"loss": 0.14, |
|
"loss/gen": 0.011369600892066956, |
|
"loss/real": 0.27243572473526, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -229.83200073242188, |
|
"rewards/margins": 218.0004425048828, |
|
"rewards/real": -11.831571578979492, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 46.20299554006212, |
|
"learning_rate": 4.373219373219373e-07, |
|
"logits/generated": -2.1441431045532227, |
|
"logits/oppo_generated": -2.8678367137908936, |
|
"logits/oppo_real": -2.797013759613037, |
|
"logits/real": -2.3208460807800293, |
|
"logps/generated": -526.330810546875, |
|
"logps/oppo_gen": -154.916748046875, |
|
"logps/oppo_real": -268.4582824707031, |
|
"logps/real": -262.050537109375, |
|
"loss": 0.1611, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.10568805783987045, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -371.41400146484375, |
|
"rewards/margins": 377.82177734375, |
|
"rewards/real": 6.407746315002441, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"grad_norm": 42.97747780678634, |
|
"learning_rate": 4.3696581196581193e-07, |
|
"logits/generated": -2.335385799407959, |
|
"logits/oppo_generated": -2.879833221435547, |
|
"logits/oppo_real": -3.0112786293029785, |
|
"logits/real": -2.5094590187072754, |
|
"logps/generated": -324.74005126953125, |
|
"logps/oppo_gen": -96.10844421386719, |
|
"logps/oppo_real": -492.59039306640625, |
|
"logps/real": -506.662109375, |
|
"loss": 0.2189, |
|
"loss/gen": 0.263028621673584, |
|
"loss/real": 0.28753662109375, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -228.631591796875, |
|
"rewards/margins": 214.55987548828125, |
|
"rewards/real": -14.071721076965332, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 96.03388887522988, |
|
"learning_rate": 4.366096866096866e-07, |
|
"logits/generated": -2.3405416011810303, |
|
"logits/oppo_generated": -2.855457305908203, |
|
"logits/oppo_real": -3.161579132080078, |
|
"logits/real": -2.4299869537353516, |
|
"logps/generated": -266.0459289550781, |
|
"logps/oppo_gen": -79.04156494140625, |
|
"logps/oppo_real": -508.73779296875, |
|
"logps/real": -511.5892333984375, |
|
"loss": 0.218, |
|
"loss/gen": 0.43865615129470825, |
|
"loss/real": 0.2312847524881363, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -187.00439453125, |
|
"rewards/margins": 184.15298461914062, |
|
"rewards/real": -2.851390838623047, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"grad_norm": 49.46367136754257, |
|
"learning_rate": 4.362535612535612e-07, |
|
"logits/generated": -2.3023407459259033, |
|
"logits/oppo_generated": -2.8270015716552734, |
|
"logits/oppo_real": -2.9884450435638428, |
|
"logits/real": -2.3669018745422363, |
|
"logps/generated": -310.2086181640625, |
|
"logps/oppo_gen": -79.96229553222656, |
|
"logps/oppo_real": -295.296630859375, |
|
"logps/real": -278.3116455078125, |
|
"loss": 0.119, |
|
"loss/gen": 0.24770958721637726, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -230.246337890625, |
|
"rewards/margins": 247.2313232421875, |
|
"rewards/real": 16.985002517700195, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 54.32108273310534, |
|
"learning_rate": 4.358974358974359e-07, |
|
"logits/generated": -2.0078024864196777, |
|
"logits/oppo_generated": -2.7040886878967285, |
|
"logits/oppo_real": -2.816561698913574, |
|
"logits/real": -2.148149013519287, |
|
"logps/generated": -327.4352111816406, |
|
"logps/oppo_gen": -55.71031188964844, |
|
"logps/oppo_real": -202.95962524414062, |
|
"logps/real": -176.31039428710938, |
|
"loss": 0.2428, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -271.72491455078125, |
|
"rewards/margins": 298.3741455078125, |
|
"rewards/real": 26.649229049682617, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 38.766303588881314, |
|
"learning_rate": 4.355413105413105e-07, |
|
"logits/generated": -1.851919412612915, |
|
"logits/oppo_generated": -2.385345458984375, |
|
"logits/oppo_real": -2.4835422039031982, |
|
"logits/real": -1.8688819408416748, |
|
"logps/generated": -298.13861083984375, |
|
"logps/oppo_gen": -75.58077239990234, |
|
"logps/oppo_real": -339.3034973144531, |
|
"logps/real": -292.58990478515625, |
|
"loss": 0.1566, |
|
"loss/gen": 0.2625175714492798, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -222.55783081054688, |
|
"rewards/margins": 269.27142333984375, |
|
"rewards/real": 46.713592529296875, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"grad_norm": 38.3459545642733, |
|
"learning_rate": 4.3518518518518514e-07, |
|
"logits/generated": -2.3448195457458496, |
|
"logits/oppo_generated": -3.011491060256958, |
|
"logits/oppo_real": -3.0487937927246094, |
|
"logits/real": -2.592437744140625, |
|
"logps/generated": -380.2159118652344, |
|
"logps/oppo_gen": -131.22396850585938, |
|
"logps/oppo_real": -400.33868408203125, |
|
"logps/real": -382.6166687011719, |
|
"loss": 0.2109, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.009188689291477203, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -248.991943359375, |
|
"rewards/margins": 266.71392822265625, |
|
"rewards/real": 17.722003936767578, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 39.05013241714905, |
|
"learning_rate": 4.348290598290598e-07, |
|
"logits/generated": -2.286303997039795, |
|
"logits/oppo_generated": -2.755108118057251, |
|
"logits/oppo_real": -2.8694067001342773, |
|
"logits/real": -2.2778568267822266, |
|
"logps/generated": -286.8591613769531, |
|
"logps/oppo_gen": -61.73572540283203, |
|
"logps/oppo_real": -230.838134765625, |
|
"logps/real": -234.90147399902344, |
|
"loss": 0.1586, |
|
"loss/gen": 0.23340168595314026, |
|
"loss/real": 0.2623848617076874, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -225.12344360351562, |
|
"rewards/margins": 221.0601043701172, |
|
"rewards/real": -4.063333511352539, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"grad_norm": 49.061494814543714, |
|
"learning_rate": 4.3447293447293444e-07, |
|
"logits/generated": -2.1600050926208496, |
|
"logits/oppo_generated": -2.8574419021606445, |
|
"logits/oppo_real": -2.923137903213501, |
|
"logits/real": -2.323585033416748, |
|
"logps/generated": -341.39599609375, |
|
"logps/oppo_gen": -82.77210998535156, |
|
"logps/oppo_real": -252.58892822265625, |
|
"logps/real": -277.39813232421875, |
|
"loss": 0.1963, |
|
"loss/gen": 0.011263325810432434, |
|
"loss/real": 0.3864287734031677, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -258.62384033203125, |
|
"rewards/margins": 233.81466674804688, |
|
"rewards/real": -24.80919075012207, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 49.97145181094652, |
|
"learning_rate": 4.341168091168091e-07, |
|
"logits/generated": -2.0936217308044434, |
|
"logits/oppo_generated": -2.994565010070801, |
|
"logits/oppo_real": -2.8149280548095703, |
|
"logits/real": -2.4390323162078857, |
|
"logps/generated": -267.6043395996094, |
|
"logps/oppo_gen": -48.2861213684082, |
|
"logps/oppo_real": -137.37625122070312, |
|
"logps/real": -160.69287109375, |
|
"loss": 0.301, |
|
"loss/gen": 0.5201160907745361, |
|
"loss/real": 0.3812367916107178, |
|
"rewards/accuracies": 0.875, |
|
"rewards/generated": -219.31820678710938, |
|
"rewards/margins": 196.00160217285156, |
|
"rewards/real": -23.31661605834961, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"grad_norm": 54.01997514664136, |
|
"learning_rate": 4.3376068376068374e-07, |
|
"logits/generated": -2.2345826625823975, |
|
"logits/oppo_generated": -2.816603422164917, |
|
"logits/oppo_real": -2.9343314170837402, |
|
"logits/real": -2.339372158050537, |
|
"logps/generated": -238.27630615234375, |
|
"logps/oppo_gen": -30.44548988342285, |
|
"logps/oppo_real": -174.9966278076172, |
|
"logps/real": -159.52218627929688, |
|
"loss": 0.2248, |
|
"loss/gen": 0.07372879981994629, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -207.830810546875, |
|
"rewards/margins": 223.30526733398438, |
|
"rewards/real": 15.47445297241211, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 26.33121852189253, |
|
"learning_rate": 4.3340455840455836e-07, |
|
"logits/generated": -2.161853075027466, |
|
"logits/oppo_generated": -2.6415185928344727, |
|
"logits/oppo_real": -3.0115818977355957, |
|
"logits/real": -2.075220823287964, |
|
"logps/generated": -362.1558532714844, |
|
"logps/oppo_gen": -93.466064453125, |
|
"logps/oppo_real": -340.529296875, |
|
"logps/real": -308.37493896484375, |
|
"loss": 0.1905, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -268.68975830078125, |
|
"rewards/margins": 300.8441162109375, |
|
"rewards/real": 32.154354095458984, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 37.262840566835145, |
|
"learning_rate": 4.3304843304843304e-07, |
|
"logits/generated": -2.1919541358947754, |
|
"logits/oppo_generated": -2.7984108924865723, |
|
"logits/oppo_real": -2.9754528999328613, |
|
"logits/real": -2.3059372901916504, |
|
"logps/generated": -304.6134338378906, |
|
"logps/oppo_gen": -69.67858123779297, |
|
"logps/oppo_real": -268.7974853515625, |
|
"logps/real": -259.7208251953125, |
|
"loss": 0.1259, |
|
"loss/gen": 0.0012919306755065918, |
|
"loss/real": 0.04866264760494232, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -234.93484497070312, |
|
"rewards/margins": 244.01148986816406, |
|
"rewards/real": 9.076637268066406, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"grad_norm": 21.304152478843108, |
|
"learning_rate": 4.326923076923077e-07, |
|
"logits/generated": -2.002150058746338, |
|
"logits/oppo_generated": -2.7994847297668457, |
|
"logits/oppo_real": -2.687981605529785, |
|
"logits/real": -2.2970423698425293, |
|
"logps/generated": -286.0855712890625, |
|
"logps/oppo_gen": -76.17577362060547, |
|
"logps/oppo_real": -381.5020751953125, |
|
"logps/real": -376.30322265625, |
|
"loss": 0.1755, |
|
"loss/gen": 0.2530289888381958, |
|
"loss/real": 0.24476546049118042, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -209.9097900390625, |
|
"rewards/margins": 215.108642578125, |
|
"rewards/real": 5.198863983154297, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 26.23410279415064, |
|
"learning_rate": 4.323361823361823e-07, |
|
"logits/generated": -2.1952879428863525, |
|
"logits/oppo_generated": -2.8429031372070312, |
|
"logits/oppo_real": -3.0224597454071045, |
|
"logits/real": -2.2873964309692383, |
|
"logps/generated": -302.1619873046875, |
|
"logps/oppo_gen": -78.5534439086914, |
|
"logps/oppo_real": -246.5026397705078, |
|
"logps/real": -227.45849609375, |
|
"loss": 0.1693, |
|
"loss/gen": 0.23556922376155853, |
|
"loss/real": 0.0011737123131752014, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -223.60853576660156, |
|
"rewards/margins": 242.65267944335938, |
|
"rewards/real": 19.044147491455078, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"grad_norm": 30.91998695442349, |
|
"learning_rate": 4.3198005698005696e-07, |
|
"logits/generated": -1.982604742050171, |
|
"logits/oppo_generated": -2.5529236793518066, |
|
"logits/oppo_real": -2.7146146297454834, |
|
"logits/real": -1.9502203464508057, |
|
"logps/generated": -342.57330322265625, |
|
"logps/oppo_gen": -79.70944213867188, |
|
"logps/oppo_real": -106.01055145263672, |
|
"logps/real": -143.1019744873047, |
|
"loss": 0.2063, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.45912817120552063, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -262.8638610839844, |
|
"rewards/margins": 225.77243041992188, |
|
"rewards/real": -37.09141540527344, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 68.66924234145881, |
|
"learning_rate": 4.3162393162393163e-07, |
|
"logits/generated": -1.856884241104126, |
|
"logits/oppo_generated": -2.5894346237182617, |
|
"logits/oppo_real": -2.6849865913391113, |
|
"logits/real": -1.9867148399353027, |
|
"logps/generated": -390.6427001953125, |
|
"logps/oppo_gen": -67.09019470214844, |
|
"logps/oppo_real": -256.4427185058594, |
|
"logps/real": -235.26731872558594, |
|
"loss": 0.1923, |
|
"loss/gen": 0.02218911051750183, |
|
"loss/real": 0.002973802387714386, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -323.552490234375, |
|
"rewards/margins": 344.7278747558594, |
|
"rewards/real": 21.17538070678711, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 25.47653974048355, |
|
"learning_rate": 4.3126780626780625e-07, |
|
"logits/generated": -2.229971408843994, |
|
"logits/oppo_generated": -2.959817886352539, |
|
"logits/oppo_real": -2.9362192153930664, |
|
"logits/real": -2.4872889518737793, |
|
"logps/generated": -313.7774658203125, |
|
"logps/oppo_gen": -82.48292541503906, |
|
"logps/oppo_real": -458.88818359375, |
|
"logps/real": -453.343017578125, |
|
"loss": 0.1714, |
|
"loss/gen": 0.06390117108821869, |
|
"loss/real": 0.09897678345441818, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -231.29452514648438, |
|
"rewards/margins": 236.83969116210938, |
|
"rewards/real": 5.545146942138672, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"grad_norm": 44.17547296984351, |
|
"learning_rate": 4.309116809116809e-07, |
|
"logits/generated": -2.093695878982544, |
|
"logits/oppo_generated": -2.7284858226776123, |
|
"logits/oppo_real": -2.8326492309570312, |
|
"logits/real": -2.21309757232666, |
|
"logps/generated": -323.655029296875, |
|
"logps/oppo_gen": -60.89936828613281, |
|
"logps/oppo_real": -245.58233642578125, |
|
"logps/real": -238.11309814453125, |
|
"loss": 0.173, |
|
"loss/gen": 0.0006021559238433838, |
|
"loss/real": 0.018122456967830658, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -262.75567626953125, |
|
"rewards/margins": 270.2248840332031, |
|
"rewards/real": 7.469233512878418, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 33.680569135700296, |
|
"learning_rate": 4.3055555555555555e-07, |
|
"logits/generated": -2.0983548164367676, |
|
"logits/oppo_generated": -2.884782075881958, |
|
"logits/oppo_real": -3.007986545562744, |
|
"logits/real": -2.3658394813537598, |
|
"logps/generated": -301.1651611328125, |
|
"logps/oppo_gen": -64.29571533203125, |
|
"logps/oppo_real": -445.2386169433594, |
|
"logps/real": -415.5571594238281, |
|
"loss": 0.1804, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -236.8694610595703, |
|
"rewards/margins": 266.5509338378906, |
|
"rewards/real": 29.681480407714844, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"grad_norm": 20.284545818732685, |
|
"learning_rate": 4.3019943019943017e-07, |
|
"logits/generated": -2.0810956954956055, |
|
"logits/oppo_generated": -2.8430304527282715, |
|
"logits/oppo_real": -2.873483657836914, |
|
"logits/real": -2.3326306343078613, |
|
"logps/generated": -320.660400390625, |
|
"logps/oppo_gen": -68.79239654541016, |
|
"logps/oppo_real": -391.89910888671875, |
|
"logps/real": -372.5426330566406, |
|
"loss": 0.1924, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -251.86802673339844, |
|
"rewards/margins": 271.22454833984375, |
|
"rewards/real": 19.35650634765625, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 31.462047177474897, |
|
"learning_rate": 4.2984330484330485e-07, |
|
"logits/generated": -2.0807740688323975, |
|
"logits/oppo_generated": -2.8508265018463135, |
|
"logits/oppo_real": -2.9677348136901855, |
|
"logits/real": -2.363823890686035, |
|
"logps/generated": -337.72705078125, |
|
"logps/oppo_gen": -88.43344116210938, |
|
"logps/oppo_real": -438.55322265625, |
|
"logps/real": -411.73760986328125, |
|
"loss": 0.2283, |
|
"loss/gen": 0.009022071957588196, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -249.2935791015625, |
|
"rewards/margins": 276.10919189453125, |
|
"rewards/real": 26.81560516357422, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"grad_norm": 30.65675470245862, |
|
"learning_rate": 4.294871794871794e-07, |
|
"logits/generated": -2.2450156211853027, |
|
"logits/oppo_generated": -2.816070079803467, |
|
"logits/oppo_real": -3.012850761413574, |
|
"logits/real": -2.3096275329589844, |
|
"logps/generated": -328.40545654296875, |
|
"logps/oppo_gen": -55.2912483215332, |
|
"logps/oppo_real": -255.20977783203125, |
|
"logps/real": -242.8808135986328, |
|
"loss": 0.1844, |
|
"loss/gen": 0.014506042003631592, |
|
"loss/real": 0.023922577500343323, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -273.1142272949219, |
|
"rewards/margins": 285.44317626953125, |
|
"rewards/real": 12.328951835632324, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 60.6619853354945, |
|
"learning_rate": 4.291310541310541e-07, |
|
"logits/generated": -2.2790613174438477, |
|
"logits/oppo_generated": -2.701869487762451, |
|
"logits/oppo_real": -2.963564872741699, |
|
"logits/real": -2.223146915435791, |
|
"logps/generated": -349.9393310546875, |
|
"logps/oppo_gen": -83.03327941894531, |
|
"logps/oppo_real": -312.4057312011719, |
|
"logps/real": -290.92095947265625, |
|
"loss": 0.1719, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.0, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -266.9060363769531, |
|
"rewards/margins": 288.39080810546875, |
|
"rewards/real": 21.484760284423828, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 47.305535835704525, |
|
"learning_rate": 4.2877492877492877e-07, |
|
"logits/generated": -2.2459330558776855, |
|
"logits/oppo_generated": -2.8546152114868164, |
|
"logits/oppo_real": -3.036848545074463, |
|
"logits/real": -2.419419050216675, |
|
"logps/generated": -338.7462158203125, |
|
"logps/oppo_gen": -75.19477844238281, |
|
"logps/oppo_real": -314.191162109375, |
|
"logps/real": -302.6313171386719, |
|
"loss": 0.1123, |
|
"loss/gen": 0.0, |
|
"loss/real": 0.11184393614530563, |
|
"rewards/accuracies": 1.0, |
|
"rewards/generated": -263.5514221191406, |
|
"rewards/margins": 275.1112365722656, |
|
"rewards/real": 11.559805870056152, |
|
"step": 239 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1434, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 6, |
|
"save_steps": 500, |
|
"total_flos": 0.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|