{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.0, "eval_steps": 100.0, "global_step": 239, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "grad_norm": 0.0, "learning_rate": 0.0, "logits/generated": -3.130502223968506, "logits/oppo_generated": -3.1088104248046875, "logits/oppo_real": -3.130502223968506, "logits/real": -3.1088104248046875, "logps/generated": -99.40917205810547, "logps/oppo_gen": -99.40917205810547, "logps/oppo_real": -459.3097229003906, "logps/real": -459.3097229003906, "loss": 2.0, "loss/gen": 2.0, "loss/real": 0.0, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 1 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 0.0, "logits/generated": -3.0933988094329834, "logits/oppo_generated": -2.919645309448242, "logits/oppo_real": -3.0933988094329834, "logits/real": -2.919645309448242, "logps/generated": -103.65153503417969, "logps/oppo_gen": -103.65153503417969, "logps/oppo_real": -392.1358642578125, "logps/real": -392.1358642578125, "loss": 2.0, "loss/gen": 2.0, "loss/real": 0.0, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 2 }, { "epoch": 0.01, "grad_norm": 0.0, "learning_rate": 0.0, "logits/generated": -2.6572537422180176, "logits/oppo_generated": -2.8074941635131836, "logits/oppo_real": -2.6572537422180176, "logits/real": -2.8074941635131836, "logps/generated": -72.88986206054688, "logps/oppo_gen": -72.88986206054688, "logps/oppo_real": -291.916748046875, "logps/real": -291.916748046875, "loss": 2.0, "loss/gen": 2.0, "loss/real": 0.0, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 3 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 0.0, "logits/generated": -2.8966193199157715, "logits/oppo_generated": -2.768460273742676, "logits/oppo_real": -2.8966193199157715, "logits/real": -2.768460273742676, "logps/generated": -64.05287170410156, "logps/oppo_gen": -64.05287170410156, "logps/oppo_real": -376.8367919921875, "logps/real": -376.8367919921875, "loss": 2.0, "loss/gen": 2.0, "loss/real": 0.0, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 4 }, { "epoch": 0.02, "grad_norm": 0.0, "learning_rate": 0.0, "logits/generated": -2.889317512512207, "logits/oppo_generated": -2.708950996398926, "logits/oppo_real": -2.889317512512207, "logits/real": -2.708950996398926, "logps/generated": -48.29164123535156, "logps/oppo_gen": -48.29164123535156, "logps/oppo_real": -173.0751953125, "logps/real": -173.0751953125, "loss": 2.0, "loss/gen": 2.0, "loss/real": 0.0, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 5 }, { "epoch": 0.03, "grad_norm": 21.44563623958737, "learning_rate": 1.6666666666666667e-08, "logits/generated": -2.957958698272705, "logits/oppo_generated": -2.749436378479004, "logits/oppo_real": -2.957958698272705, "logits/real": -2.749436378479004, "logps/generated": -48.84138488769531, "logps/oppo_gen": -48.84138488769531, "logps/oppo_real": -139.2998046875, "logps/real": -139.2998046875, "loss": 2.0, "loss/gen": 2.0, "loss/real": 0.0, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 6 }, { "epoch": 0.03, "grad_norm": 24.181090932615223, "learning_rate": 3.3333333333333334e-08, "logits/generated": -3.1195316314697266, "logits/oppo_generated": -2.9545342922210693, "logits/oppo_real": -3.1195316314697266, "logits/real": -2.9545342922210693, "logps/generated": -163.2059783935547, "logps/oppo_gen": -163.2059783935547, "logps/oppo_real": -432.88226318359375, "logps/real": -432.88226318359375, "loss": 2.0, "loss/gen": 2.0, "loss/real": 0.0, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 7 }, { "epoch": 0.03, "grad_norm": 23.92123585650089, "learning_rate": 5e-08, "logits/generated": -2.910332441329956, "logits/oppo_generated": -2.9416637420654297, "logits/oppo_real": -2.910332441329956, "logits/real": -2.9416637420654297, "logps/generated": -69.29386901855469, "logps/oppo_gen": -69.29386901855469, "logps/oppo_real": -311.59619140625, "logps/real": -311.59619140625, "loss": 2.0, "loss/gen": 2.0, "loss/real": 0.0, "rewards/accuracies": 0.0, "rewards/generated": 0.0, "rewards/margins": 0.0, "rewards/real": 0.0, "step": 8 }, { "epoch": 0.04, "grad_norm": 18.72729908334438, "learning_rate": 6.666666666666667e-08, "logits/generated": -2.409820318222046, "logits/oppo_generated": -2.294548273086548, "logits/oppo_real": -2.409976005554199, "logits/real": -2.29451322555542, "logps/generated": -82.21556091308594, "logps/oppo_gen": -82.20011138916016, "logps/oppo_real": -381.1852111816406, "logps/real": -381.1634521484375, "loss": 2.0, "loss/gen": 1.9998455047607422, "loss/real": 7.733702659606934e-06, "rewards/accuracies": 0.875, "rewards/generated": -0.015454292297363281, "rewards/margins": 0.03723621368408203, "rewards/real": 0.02178192138671875, "step": 9 }, { "epoch": 0.04, "grad_norm": 17.202277028374397, "learning_rate": 8.333333333333333e-08, "logits/generated": -2.962700366973877, "logits/oppo_generated": -2.9239017963409424, "logits/oppo_real": -2.963313579559326, "logits/real": -2.923351526260376, "logps/generated": -93.16413879394531, "logps/oppo_gen": -93.09856414794922, "logps/oppo_real": -233.10401916503906, "logps/real": -233.077880859375, "loss": 1.9995, "loss/gen": 1.9993443489074707, "loss/real": 5.08427619934082e-05, "rewards/accuracies": 0.875, "rewards/generated": -0.06557583808898926, "rewards/margins": 0.09174036979675293, "rewards/real": 0.026164531707763672, "step": 10 }, { "epoch": 0.05, "grad_norm": 17.401866312895148, "learning_rate": 1e-07, "logits/generated": -2.8563976287841797, "logits/oppo_generated": -2.837850570678711, "logits/oppo_real": -2.857771396636963, "logits/real": -2.836169958114624, "logps/generated": -59.6667594909668, "logps/oppo_gen": -59.46293640136719, "logps/oppo_real": -142.69805908203125, "logps/real": -142.69808959960938, "loss": 1.998, "loss/gen": 1.9979617595672607, "loss/real": 0.0006099119782447815, "rewards/accuracies": 0.875, "rewards/generated": -0.20382428169250488, "rewards/margins": 0.20379090309143066, "rewards/real": -3.337860107421875e-05, "step": 11 }, { "epoch": 0.05, "grad_norm": 17.22730830301668, "learning_rate": 1.1666666666666667e-07, "logits/generated": -2.875734329223633, "logits/oppo_generated": -2.7672762870788574, "logits/oppo_real": -2.8780808448791504, "logits/real": -2.7644264698028564, "logps/generated": -70.91142272949219, "logps/oppo_gen": -70.58644104003906, "logps/oppo_real": -343.4704284667969, "logps/real": -343.3141174316406, "loss": 1.9968, "loss/gen": 1.9967502355575562, "loss/real": 0.00020247697830200195, "rewards/accuracies": 1.0, "rewards/generated": -0.32497692108154297, "rewards/margins": 0.48128724098205566, "rewards/real": 0.1563103199005127, "step": 12 }, { "epoch": 0.05, "grad_norm": 15.819281409300507, "learning_rate": 1.3333333333333334e-07, "logits/generated": -2.815218925476074, "logits/oppo_generated": -2.8374581336975098, "logits/oppo_real": -2.822021961212158, "logits/real": -2.830238103866577, "logps/generated": -107.48092651367188, "logps/oppo_gen": -106.73956298828125, "logps/oppo_real": -280.41741943359375, "logps/real": -280.51971435546875, "loss": 1.9918, "loss/gen": 1.9925864934921265, "loss/real": 0.0017677471041679382, "rewards/accuracies": 1.0, "rewards/generated": -0.7413442134857178, "rewards/margins": 0.6390435695648193, "rewards/real": -0.10230064392089844, "step": 13 }, { "epoch": 0.06, "grad_norm": 16.905273409293585, "learning_rate": 1.5e-07, "logits/generated": -2.7615389823913574, "logits/oppo_generated": -2.8255615234375, "logits/oppo_real": -2.771684169769287, "logits/real": -2.815335273742676, "logps/generated": -87.6278076171875, "logps/oppo_gen": -85.86231994628906, "logps/oppo_real": -289.01318359375, "logps/real": -288.6315612792969, "loss": 1.9878, "loss/gen": 1.9823451042175293, "loss/real": 0.001967109739780426, "rewards/accuracies": 1.0, "rewards/generated": -1.7654941082000732, "rewards/margins": 2.147136688232422, "rewards/real": 0.38164258003234863, "step": 14 }, { "epoch": 0.06, "grad_norm": 16.47449070515203, "learning_rate": 1.6666666666666665e-07, "logits/generated": -3.1344149112701416, "logits/oppo_generated": -2.7394165992736816, "logits/oppo_real": -3.1553921699523926, "logits/real": -2.717817544937134, "logps/generated": -77.48875427246094, "logps/oppo_gen": -74.47514343261719, "logps/oppo_real": -366.370361328125, "logps/real": -366.13739013671875, "loss": 1.9771, "loss/gen": 1.9698638916015625, "loss/real": 0.002904340624809265, "rewards/accuracies": 1.0, "rewards/generated": -3.013608455657959, "rewards/margins": 3.2465546131134033, "rewards/real": 0.23294615745544434, "step": 15 }, { "epoch": 0.07, "grad_norm": 16.775265896504816, "learning_rate": 1.833333333333333e-07, "logits/generated": -2.108006000518799, "logits/oppo_generated": -2.1468427181243896, "logits/oppo_real": -2.142064094543457, "logits/real": -2.117852210998535, "logps/generated": -81.89228820800781, "logps/oppo_gen": -78.08332824707031, "logps/oppo_real": -437.152587890625, "logps/real": -437.1942443847656, "loss": 1.9669, "loss/gen": 1.9619104862213135, "loss/real": 0.004711121320724487, "rewards/accuracies": 1.0, "rewards/generated": -3.8089609146118164, "rewards/margins": 3.76729679107666, "rewards/real": -0.04166412353515625, "step": 16 }, { "epoch": 0.07, "grad_norm": 15.954718888453357, "learning_rate": 2e-07, "logits/generated": -2.9154043197631836, "logits/oppo_generated": -2.902646064758301, "logits/oppo_real": -2.953411817550659, "logits/real": -2.8594038486480713, "logps/generated": -77.6299819946289, "logps/oppo_gen": -72.53976440429688, "logps/oppo_real": -310.7004089355469, "logps/real": -310.427734375, "loss": 1.961, "loss/gen": 1.9490978717803955, "loss/real": 0.0060840025544166565, "rewards/accuracies": 1.0, "rewards/generated": -5.090216636657715, "rewards/margins": 5.362889289855957, "rewards/real": 0.2726726531982422, "step": 17 }, { "epoch": 0.08, "grad_norm": 17.536179990025712, "learning_rate": 2.1666666666666667e-07, "logits/generated": -2.9157583713531494, "logits/oppo_generated": -2.947140693664551, "logits/oppo_real": -2.9634807109832764, "logits/real": -2.8917417526245117, "logps/generated": -83.36225891113281, "logps/oppo_gen": -74.80116271972656, "logps/oppo_real": -309.46124267578125, "logps/real": -309.94171142578125, "loss": 1.9385, "loss/gen": 1.9143891334533691, "loss/real": 0.010849758982658386, "rewards/accuracies": 1.0, "rewards/generated": -8.56109619140625, "rewards/margins": 8.080650329589844, "rewards/real": -0.48044562339782715, "step": 18 }, { "epoch": 0.08, "grad_norm": 18.055822491099047, "learning_rate": 2.3333333333333333e-07, "logits/generated": -2.398920774459839, "logits/oppo_generated": -2.6668543815612793, "logits/oppo_real": -2.47564697265625, "logits/real": -2.5944724082946777, "logps/generated": -77.91998291015625, "logps/oppo_gen": -67.190673828125, "logps/oppo_real": -285.60797119140625, "logps/real": -287.39215087890625, "loss": 1.9138, "loss/gen": 1.892707109451294, "loss/real": 0.023035116493701935, "rewards/accuracies": 1.0, "rewards/generated": -10.729303359985352, "rewards/margins": 8.945160865783691, "rewards/real": -1.7841424942016602, "step": 19 }, { "epoch": 0.08, "grad_norm": 16.8409722750193, "learning_rate": 2.5e-07, "logits/generated": -3.0429744720458984, "logits/oppo_generated": -2.7376956939697266, "logits/oppo_real": -3.1153059005737305, "logits/real": -2.66239595413208, "logps/generated": -108.49114990234375, "logps/oppo_gen": -93.65745544433594, "logps/oppo_real": -173.968994140625, "logps/real": -176.7364044189453, "loss": 1.9, "loss/gen": 1.8516631126403809, "loss/real": 0.030972033739089966, "rewards/accuracies": 1.0, "rewards/generated": -14.833693504333496, "rewards/margins": 12.066278457641602, "rewards/real": -2.7674155235290527, "step": 20 }, { "epoch": 0.09, "grad_norm": 17.0265708165071, "learning_rate": 2.6666666666666667e-07, "logits/generated": -2.807745933532715, "logits/oppo_generated": -2.6699156761169434, "logits/oppo_real": -2.8930060863494873, "logits/real": -2.5801193714141846, "logps/generated": -62.11570739746094, "logps/oppo_gen": -50.189754486083984, "logps/oppo_real": -197.0562286376953, "logps/real": -198.63250732421875, "loss": 1.8804, "loss/gen": 1.8807404041290283, "loss/real": 0.03226040303707123, "rewards/accuracies": 0.875, "rewards/generated": -11.925955772399902, "rewards/margins": 10.349677085876465, "rewards/real": -1.5762791633605957, "step": 21 }, { "epoch": 0.09, "grad_norm": 15.942840997675052, "learning_rate": 2.833333333333333e-07, "logits/generated": -2.883525848388672, "logits/oppo_generated": -2.8113152980804443, "logits/oppo_real": -2.997610330581665, "logits/real": -2.701076030731201, "logps/generated": -77.23695373535156, "logps/oppo_gen": -59.91856384277344, "logps/oppo_real": -175.6089324951172, "logps/real": -180.22738647460938, "loss": 1.8703, "loss/gen": 1.8268163204193115, "loss/real": 0.05111686885356903, "rewards/accuracies": 1.0, "rewards/generated": -17.318382263183594, "rewards/margins": 12.699928283691406, "rewards/real": -4.618453025817871, "step": 22 }, { "epoch": 0.1, "grad_norm": 18.193819711198465, "learning_rate": 3e-07, "logits/generated": -2.71798038482666, "logits/oppo_generated": -2.712057113647461, "logits/oppo_real": -2.83805513381958, "logits/real": -2.592156410217285, "logps/generated": -108.12179565429688, "logps/oppo_gen": -84.5518798828125, "logps/oppo_real": -331.96221923828125, "logps/real": -337.7485656738281, "loss": 1.8307, "loss/gen": 1.76430082321167, "loss/real": 0.058562956750392914, "rewards/accuracies": 1.0, "rewards/generated": -23.569915771484375, "rewards/margins": 17.783584594726562, "rewards/real": -5.786332130432129, "step": 23 }, { "epoch": 0.1, "grad_norm": 16.996427272784786, "learning_rate": 3.166666666666666e-07, "logits/generated": -2.2170791625976562, "logits/oppo_generated": -2.4313888549804688, "logits/oppo_real": -2.3368191719055176, "logits/real": -2.3218801021575928, "logps/generated": -98.60990142822266, "logps/oppo_gen": -70.7446060180664, "logps/oppo_real": -186.56976318359375, "logps/real": -192.2266082763672, "loss": 1.8198, "loss/gen": 1.7213470935821533, "loss/real": 0.05736871063709259, "rewards/accuracies": 1.0, "rewards/generated": -27.865299224853516, "rewards/margins": 22.208459854125977, "rewards/real": -5.6568403244018555, "step": 24 }, { "epoch": 0.1, "grad_norm": 16.67077026711208, "learning_rate": 3.333333333333333e-07, "logits/generated": -2.77504825592041, "logits/oppo_generated": -2.8222999572753906, "logits/oppo_real": -2.956730842590332, "logits/real": -2.662292957305908, "logps/generated": -79.92071533203125, "logps/oppo_gen": -55.461936950683594, "logps/oppo_real": -125.98847198486328, "logps/real": -132.84556579589844, "loss": 1.7945, "loss/gen": 1.7554123401641846, "loss/real": 0.07477347552776337, "rewards/accuracies": 0.875, "rewards/generated": -24.458776473999023, "rewards/margins": 17.601680755615234, "rewards/real": -6.857094764709473, "step": 25 }, { "epoch": 0.11, "grad_norm": 16.22325115917978, "learning_rate": 3.5e-07, "logits/generated": -2.4703292846679688, "logits/oppo_generated": -2.9076757431030273, "logits/oppo_real": -2.661245822906494, "logits/real": -2.7052745819091797, "logps/generated": -103.34319305419922, "logps/oppo_gen": -71.46342468261719, "logps/oppo_real": -293.69677734375, "logps/real": -296.63507080078125, "loss": 1.7719, "loss/gen": 1.6812022924423218, "loss/real": 0.05883955955505371, "rewards/accuracies": 1.0, "rewards/generated": -31.879772186279297, "rewards/margins": 28.941482543945312, "rewards/real": -2.938288688659668, "step": 26 }, { "epoch": 0.11, "grad_norm": 15.655657911240558, "learning_rate": 3.666666666666666e-07, "logits/generated": -2.6560888290405273, "logits/oppo_generated": -3.018123149871826, "logits/oppo_real": -2.837935447692871, "logits/real": -2.8067123889923096, "logps/generated": -81.89646911621094, "logps/oppo_gen": -51.06623458862305, "logps/oppo_real": -151.72972106933594, "logps/real": -171.45449829101562, "loss": 1.7534, "loss/gen": 1.6916977167129517, "loss/real": 0.19724780321121216, "rewards/accuracies": 0.875, "rewards/generated": -30.83022689819336, "rewards/margins": 11.105447769165039, "rewards/real": -19.724781036376953, "step": 27 }, { "epoch": 0.12, "grad_norm": 15.655657911240558, "learning_rate": 3.666666666666666e-07, "logits/generated": -2.387822151184082, "logits/oppo_generated": -2.7700376510620117, "logits/oppo_real": -2.6328747272491455, "logits/real": -2.5694613456726074, "logps/generated": -121.05097961425781, "logps/oppo_gen": -72.09120178222656, "logps/oppo_real": -411.427978515625, "logps/real": -412.50714111328125, "loss": 1.6877, "loss/gen": 1.5104023218154907, "loss/real": 0.0984681025147438, "rewards/accuracies": 1.0, "rewards/generated": -48.95977020263672, "rewards/margins": 47.880558013916016, "rewards/real": -1.0792131423950195, "step": 28 }, { "epoch": 0.12, "grad_norm": 16.840291816557006, "learning_rate": 3.8333333333333335e-07, "logits/generated": -2.6876704692840576, "logits/oppo_generated": -2.91198468208313, "logits/oppo_real": -2.9211230278015137, "logits/real": -2.680572509765625, "logps/generated": -127.4861831665039, "logps/oppo_gen": -82.21741485595703, "logps/oppo_real": -301.3589172363281, "logps/real": -309.1620178222656, "loss": 1.7013, "loss/gen": 1.5473122596740723, "loss/real": 0.10803677141666412, "rewards/accuracies": 1.0, "rewards/generated": -45.268768310546875, "rewards/margins": 37.46567153930664, "rewards/real": -7.8031005859375, "step": 29 }, { "epoch": 0.13, "grad_norm": 17.95870813154182, "learning_rate": 4e-07, "logits/generated": -2.7156505584716797, "logits/oppo_generated": -2.4022648334503174, "logits/oppo_real": -2.97650146484375, "logits/real": -2.1997687816619873, "logps/generated": -136.54647827148438, "logps/oppo_gen": -99.30915832519531, "logps/oppo_real": -226.3162841796875, "logps/real": -240.60678100585938, "loss": 1.6691, "loss/gen": 1.627626895904541, "loss/real": 0.14702296257019043, "rewards/accuracies": 1.0, "rewards/generated": -37.23731231689453, "rewards/margins": 22.946819305419922, "rewards/real": -14.29049301147461, "step": 30 }, { "epoch": 0.13, "grad_norm": 18.000802763945956, "learning_rate": 4.1666666666666667e-07, "logits/generated": -2.7124738693237305, "logits/oppo_generated": -2.854034900665283, "logits/oppo_real": -2.9424033164978027, "logits/real": -2.666820526123047, "logps/generated": -94.56930541992188, "logps/oppo_gen": -54.3837890625, "logps/oppo_real": -252.91123962402344, "logps/real": -263.7140197753906, "loss": 1.635, "loss/gen": 1.5981448888778687, "loss/real": 0.11872847378253937, "rewards/accuracies": 1.0, "rewards/generated": -40.185516357421875, "rewards/margins": 29.382728576660156, "rewards/real": -10.802785873413086, "step": 31 }, { "epoch": 0.13, "grad_norm": 17.376117039182688, "learning_rate": 4.3333333333333335e-07, "logits/generated": -2.646808385848999, "logits/oppo_generated": -2.9263906478881836, "logits/oppo_real": -2.9535346031188965, "logits/real": -2.6604020595550537, "logps/generated": -133.86170959472656, "logps/oppo_gen": -78.93435668945312, "logps/oppo_real": -298.2490234375, "logps/real": -312.76690673828125, "loss": 1.629, "loss/gen": 1.4507265090942383, "loss/real": 0.1622442901134491, "rewards/accuracies": 1.0, "rewards/generated": -54.927345275878906, "rewards/margins": 40.409461975097656, "rewards/real": -14.517885208129883, "step": 32 }, { "epoch": 0.14, "grad_norm": 19.314468826625482, "learning_rate": 4.5e-07, "logits/generated": -2.7972543239593506, "logits/oppo_generated": -2.9521539211273193, "logits/oppo_real": -3.0699048042297363, "logits/real": -2.6584794521331787, "logps/generated": -189.65911865234375, "logps/oppo_gen": -136.80690002441406, "logps/oppo_real": -344.64990234375, "logps/real": -362.10601806640625, "loss": 1.5829, "loss/gen": 1.4714778661727905, "loss/real": 0.20715071260929108, "rewards/accuracies": 1.0, "rewards/generated": -52.85221862792969, "rewards/margins": 35.396095275878906, "rewards/real": -17.45612144470215, "step": 33 }, { "epoch": 0.14, "grad_norm": 19.659759256745826, "learning_rate": 4.6666666666666666e-07, "logits/generated": -2.6684279441833496, "logits/oppo_generated": -2.8447458744049072, "logits/oppo_real": -2.998192548751831, "logits/real": -2.5552549362182617, "logps/generated": -146.35748291015625, "logps/oppo_gen": -79.24800109863281, "logps/oppo_real": -401.9757385253906, "logps/real": -426.55157470703125, "loss": 1.5299, "loss/gen": 1.3289053440093994, "loss/real": 0.2510119676589966, "rewards/accuracies": 1.0, "rewards/generated": -67.10946655273438, "rewards/margins": 42.533626556396484, "rewards/real": -24.575847625732422, "step": 34 }, { "epoch": 0.15, "grad_norm": 21.872934472794725, "learning_rate": 4.833333333333333e-07, "logits/generated": -2.586422920227051, "logits/oppo_generated": -2.942030906677246, "logits/oppo_real": -2.9536867141723633, "logits/real": -2.602694511413574, "logps/generated": -140.24705505371094, "logps/oppo_gen": -62.21235656738281, "logps/oppo_real": -296.8402404785156, "logps/real": -320.66766357421875, "loss": 1.4528, "loss/gen": 1.2196528911590576, "loss/real": 0.259676456451416, "rewards/accuracies": 1.0, "rewards/generated": -78.03470611572266, "rewards/margins": 54.207305908203125, "rewards/real": -23.8273983001709, "step": 35 }, { "epoch": 0.15, "grad_norm": 22.019553078421858, "learning_rate": 5e-07, "logits/generated": -2.2861862182617188, "logits/oppo_generated": -2.792217493057251, "logits/oppo_real": -2.680948257446289, "logits/real": -2.4454588890075684, "logps/generated": -128.45945739746094, "logps/oppo_gen": -49.044715881347656, "logps/oppo_real": -183.3726348876953, "logps/real": -205.66845703125, "loss": 1.4374, "loss/gen": 1.2058525085449219, "loss/real": 0.26081162691116333, "rewards/accuracies": 0.75, "rewards/generated": -79.41474914550781, "rewards/margins": 57.11891555786133, "rewards/real": -22.29583168029785, "step": 36 }, { "epoch": 0.15, "grad_norm": 21.563572776755652, "learning_rate": 4.996438746438746e-07, "logits/generated": -2.352074146270752, "logits/oppo_generated": -2.5968940258026123, "logits/oppo_real": -2.84472393989563, "logits/real": -2.1995410919189453, "logps/generated": -192.30780029296875, "logps/oppo_gen": -96.46727752685547, "logps/oppo_real": -441.2087097167969, "logps/real": -454.900390625, "loss": 1.3751, "loss/gen": 1.1064567565917969, "loss/real": 0.17831739783287048, "rewards/accuracies": 1.0, "rewards/generated": -95.84051513671875, "rewards/margins": 82.14884948730469, "rewards/real": -13.691666603088379, "step": 37 }, { "epoch": 0.16, "grad_norm": 27.510149937966077, "learning_rate": 4.992877492877492e-07, "logits/generated": -2.568110466003418, "logits/oppo_generated": -3.097993850708008, "logits/oppo_real": -3.161780834197998, "logits/real": -2.603790283203125, "logps/generated": -209.0296630859375, "logps/oppo_gen": -86.33152770996094, "logps/oppo_real": -374.5130615234375, "logps/real": -400.4665832519531, "loss": 1.2577, "loss/gen": 0.7730186581611633, "loss/real": 0.2766711115837097, "rewards/accuracies": 0.875, "rewards/generated": -122.69813537597656, "rewards/margins": 96.74461364746094, "rewards/real": -25.95351791381836, "step": 38 }, { "epoch": 0.16, "grad_norm": 27.09461729076114, "learning_rate": 4.98931623931624e-07, "logits/generated": -2.2019739151000977, "logits/oppo_generated": -2.648486614227295, "logits/oppo_real": -2.7488012313842773, "logits/real": -2.1398563385009766, "logps/generated": -196.87429809570312, "logps/oppo_gen": -78.30477142333984, "logps/oppo_real": -363.86407470703125, "logps/real": -402.26739501953125, "loss": 1.2019, "loss/gen": 0.8143048882484436, "loss/real": 0.3951404094696045, "rewards/accuracies": 1.0, "rewards/generated": -118.56951904296875, "rewards/margins": 80.16621398925781, "rewards/real": -38.403289794921875, "step": 39 }, { "epoch": 0.17, "grad_norm": 27.1369816207399, "learning_rate": 4.985754985754986e-07, "logits/generated": -2.157773017883301, "logits/oppo_generated": -2.864193916320801, "logits/oppo_real": -2.7761850357055664, "logits/real": -2.3227579593658447, "logps/generated": -172.39085388183594, "logps/oppo_gen": -60.6450309753418, "logps/oppo_real": -320.1565856933594, "logps/real": -337.9253845214844, "loss": 1.1863, "loss/gen": 0.9242483377456665, "loss/real": 0.19020405411720276, "rewards/accuracies": 0.875, "rewards/generated": -111.7458267211914, "rewards/margins": 93.97701263427734, "rewards/real": -17.768808364868164, "step": 40 }, { "epoch": 0.17, "grad_norm": 24.71832653121781, "learning_rate": 4.982193732193732e-07, "logits/generated": -2.3361663818359375, "logits/oppo_generated": -2.812058210372925, "logits/oppo_real": -2.982236862182617, "logits/real": -2.289778470993042, "logps/generated": -202.0223388671875, "logps/oppo_gen": -90.06674194335938, "logps/oppo_real": -176.9713592529297, "logps/real": -221.30557250976562, "loss": 1.1883, "loss/gen": 0.8804440498352051, "loss/real": 0.46414560079574585, "rewards/accuracies": 1.0, "rewards/generated": -111.95559692382812, "rewards/margins": 67.62137603759766, "rewards/real": -44.3342170715332, "step": 41 }, { "epoch": 0.18, "grad_norm": 25.356203831684, "learning_rate": 4.978632478632478e-07, "logits/generated": -2.2286205291748047, "logits/oppo_generated": -2.9253015518188477, "logits/oppo_real": -2.9079301357269287, "logits/real": -2.3039026260375977, "logps/generated": -171.630859375, "logps/oppo_gen": -54.79414367675781, "logps/oppo_real": -186.92176818847656, "logps/real": -248.6990966796875, "loss": 1.1143, "loss/gen": 0.8316328525543213, "loss/real": 0.6177734136581421, "rewards/accuracies": 1.0, "rewards/generated": -116.83671569824219, "rewards/margins": 55.05937194824219, "rewards/real": -61.77734375, "step": 42 }, { "epoch": 0.18, "grad_norm": 27.712655447875367, "learning_rate": 4.975071225071225e-07, "logits/generated": -2.067673683166504, "logits/oppo_generated": -2.9949498176574707, "logits/oppo_real": -2.9107003211975098, "logits/real": -2.3845181465148926, "logps/generated": -286.51165771484375, "logps/oppo_gen": -79.9820785522461, "logps/oppo_real": -404.1100158691406, "logps/real": -439.1029968261719, "loss": 1.0441, "loss/gen": 0.4001755118370056, "loss/real": 0.3547167181968689, "rewards/accuracies": 1.0, "rewards/generated": -206.529541015625, "rewards/margins": 171.53656005859375, "rewards/real": -34.99298858642578, "step": 43 }, { "epoch": 0.18, "grad_norm": 26.747401031017247, "learning_rate": 4.971509971509972e-07, "logits/generated": -1.7710440158843994, "logits/oppo_generated": -2.4440221786499023, "logits/oppo_real": -2.3998050689697266, "logits/real": -1.8367252349853516, "logps/generated": -312.6555480957031, "logps/oppo_gen": -93.22187805175781, "logps/oppo_real": -290.8685302734375, "logps/real": -330.1634521484375, "loss": 0.9874, "loss/gen": 0.40501296520233154, "loss/real": 0.39968231320381165, "rewards/accuracies": 1.0, "rewards/generated": -219.43365478515625, "rewards/margins": 180.13876342773438, "rewards/real": -39.29491424560547, "step": 44 }, { "epoch": 0.19, "grad_norm": 29.940363291114327, "learning_rate": 4.967948717948718e-07, "logits/generated": -2.0964088439941406, "logits/oppo_generated": -2.9232547283172607, "logits/oppo_real": -2.7114880084991455, "logits/real": -2.3123269081115723, "logps/generated": -256.84454345703125, "logps/oppo_gen": -64.50846862792969, "logps/oppo_real": -239.8323974609375, "logps/real": -305.34417724609375, "loss": 0.9685, "loss/gen": 0.41360723972320557, "loss/real": 0.6562252640724182, "rewards/accuracies": 1.0, "rewards/generated": -192.33609008789062, "rewards/margins": 126.8243179321289, "rewards/real": -65.51176452636719, "step": 45 }, { "epoch": 0.19, "grad_norm": 26.65060984072996, "learning_rate": 4.964387464387464e-07, "logits/generated": -2.263786792755127, "logits/oppo_generated": -2.741456985473633, "logits/oppo_real": -2.9938759803771973, "logits/real": -2.225804328918457, "logps/generated": -184.13873291015625, "logps/oppo_gen": -58.174400329589844, "logps/oppo_real": -258.21685791015625, "logps/real": -307.77520751953125, "loss": 0.9712, "loss/gen": 0.7734701633453369, "loss/real": 0.5007840394973755, "rewards/accuracies": 0.875, "rewards/generated": -125.96434783935547, "rewards/margins": 76.40599060058594, "rewards/real": -49.558353424072266, "step": 46 }, { "epoch": 0.2, "grad_norm": 28.4577902373904, "learning_rate": 4.96082621082621e-07, "logits/generated": -2.171962022781372, "logits/oppo_generated": -2.814079761505127, "logits/oppo_real": -2.964923620223999, "logits/real": -2.236274480819702, "logps/generated": -279.45623779296875, "logps/oppo_gen": -78.5189208984375, "logps/oppo_real": -288.56396484375, "logps/real": -338.4586181640625, "loss": 0.8781, "loss/gen": 0.38681352138519287, "loss/real": 0.5231560468673706, "rewards/accuracies": 1.0, "rewards/generated": -200.93731689453125, "rewards/margins": 151.0426788330078, "rewards/real": -49.89463806152344, "step": 47 }, { "epoch": 0.2, "grad_norm": 28.363362753377597, "learning_rate": 4.957264957264958e-07, "logits/generated": -2.1406655311584473, "logits/oppo_generated": -2.7121076583862305, "logits/oppo_real": -2.932806968688965, "logits/real": -2.1540122032165527, "logps/generated": -254.42098999023438, "logps/oppo_gen": -72.10917663574219, "logps/oppo_real": -299.3392333984375, "logps/real": -366.06121826171875, "loss": 0.8697, "loss/gen": 0.38246485590934753, "loss/real": 0.6711124181747437, "rewards/accuracies": 1.0, "rewards/generated": -182.31182861328125, "rewards/margins": 115.58984375, "rewards/real": -66.72196960449219, "step": 48 }, { "epoch": 0.21, "grad_norm": 29.28652212909496, "learning_rate": 4.953703703703703e-07, "logits/generated": -2.362544059753418, "logits/oppo_generated": -2.814209461212158, "logits/oppo_real": -3.157527208328247, "logits/real": -2.4037039279937744, "logps/generated": -309.79132080078125, "logps/oppo_gen": -80.24543762207031, "logps/oppo_real": -294.9969482421875, "logps/real": -339.64544677734375, "loss": 0.7412, "loss/gen": 0.41209107637405396, "loss/real": 0.4473879337310791, "rewards/accuracies": 1.0, "rewards/generated": -229.5458984375, "rewards/margins": 184.89743041992188, "rewards/real": -44.648468017578125, "step": 49 }, { "epoch": 0.21, "grad_norm": 37.235111000384265, "learning_rate": 4.95014245014245e-07, "logits/generated": -2.274956464767456, "logits/oppo_generated": -2.9343652725219727, "logits/oppo_real": -2.7617945671081543, "logits/real": -2.470684051513672, "logps/generated": -271.6064758300781, "logps/oppo_gen": -82.74765014648438, "logps/oppo_real": -315.32562255859375, "logps/real": -348.8078308105469, "loss": 0.8359, "loss/gen": 0.3331334590911865, "loss/real": 0.33817416429519653, "rewards/accuracies": 1.0, "rewards/generated": -188.85882568359375, "rewards/margins": 155.37660217285156, "rewards/real": -33.482208251953125, "step": 50 }, { "epoch": 0.21, "grad_norm": 24.169072033670428, "learning_rate": 4.946581196581196e-07, "logits/generated": -2.1854918003082275, "logits/oppo_generated": -2.805569648742676, "logits/oppo_real": -2.7846250534057617, "logits/real": -2.2574825286865234, "logps/generated": -207.7048797607422, "logps/oppo_gen": -45.456573486328125, "logps/oppo_real": -161.39598083496094, "logps/real": -193.92062377929688, "loss": 0.7238, "loss/gen": 0.49297964572906494, "loss/real": 0.37008020281791687, "rewards/accuracies": 1.0, "rewards/generated": -162.24830627441406, "rewards/margins": 129.72366333007812, "rewards/real": -32.52463150024414, "step": 51 }, { "epoch": 0.22, "grad_norm": 28.44118708927029, "learning_rate": 4.943019943019943e-07, "logits/generated": -1.998687982559204, "logits/oppo_generated": -2.7444612979888916, "logits/oppo_real": -2.7595162391662598, "logits/real": -2.086247444152832, "logps/generated": -203.22409057617188, "logps/oppo_gen": -50.193504333496094, "logps/oppo_real": -148.25294494628906, "logps/real": -181.40602111816406, "loss": 0.7164, "loss/gen": 0.5076989531517029, "loss/real": 0.35455718636512756, "rewards/accuracies": 1.0, "rewards/generated": -153.0305938720703, "rewards/margins": 119.87752532958984, "rewards/real": -33.15306854248047, "step": 52 }, { "epoch": 0.22, "grad_norm": 28.44118708927029, "learning_rate": 4.943019943019943e-07, "logits/generated": -1.962064504623413, "logits/oppo_generated": -2.660369396209717, "logits/oppo_real": -2.6082496643066406, "logits/real": -2.004966974258423, "logps/generated": -199.8478240966797, "logps/oppo_gen": -55.80210876464844, "logps/oppo_real": -201.49038696289062, "logps/real": -236.99850463867188, "loss": 0.61, "loss/gen": 0.625639796257019, "loss/real": 0.3782804012298584, "rewards/accuracies": 1.0, "rewards/generated": -144.04571533203125, "rewards/margins": 108.53761291503906, "rewards/real": -35.50811767578125, "step": 53 }, { "epoch": 0.23, "grad_norm": 34.77088794433499, "learning_rate": 4.93945868945869e-07, "logits/generated": -2.1952624320983887, "logits/oppo_generated": -2.746832847595215, "logits/oppo_real": -2.973560333251953, "logits/real": -2.191551685333252, "logps/generated": -218.537109375, "logps/oppo_gen": -77.28608703613281, "logps/oppo_real": -547.3628540039062, "logps/real": -573.14208984375, "loss": 0.6871, "loss/gen": 0.6249206066131592, "loss/real": 0.30037403106689453, "rewards/accuracies": 0.875, "rewards/generated": -141.2510223388672, "rewards/margins": 115.47174835205078, "rewards/real": -25.77927017211914, "step": 54 }, { "epoch": 0.23, "grad_norm": 29.88689616704953, "learning_rate": 4.935897435897436e-07, "logits/generated": -1.6832900047302246, "logits/oppo_generated": -2.664555072784424, "logits/oppo_real": -2.6400251388549805, "logits/real": -2.0845460891723633, "logps/generated": -307.5957336425781, "logps/oppo_gen": -78.57785034179688, "logps/oppo_real": -398.628662109375, "logps/real": -400.03839111328125, "loss": 0.6084, "loss/gen": 0.14119790494441986, "loss/real": 0.09441090375185013, "rewards/accuracies": 1.0, "rewards/generated": -229.01788330078125, "rewards/margins": 227.60816955566406, "rewards/real": -1.4097027778625488, "step": 55 }, { "epoch": 0.23, "grad_norm": 26.655489804517757, "learning_rate": 4.932336182336182e-07, "logits/generated": -2.235076665878296, "logits/oppo_generated": -2.638930320739746, "logits/oppo_real": -3.1015210151672363, "logits/real": -2.072552442550659, "logps/generated": -260.4788818359375, "logps/oppo_gen": -84.6130599975586, "logps/oppo_real": -310.54534912109375, "logps/real": -339.3001708984375, "loss": 0.6508, "loss/gen": 0.5073614716529846, "loss/real": 0.32746076583862305, "rewards/accuracies": 0.75, "rewards/generated": -175.86582946777344, "rewards/margins": 147.1110076904297, "rewards/real": -28.754831314086914, "step": 56 }, { "epoch": 0.24, "grad_norm": 51.166719683061565, "learning_rate": 4.928774928774928e-07, "logits/generated": -2.112302780151367, "logits/oppo_generated": -2.9305167198181152, "logits/oppo_real": -2.7986156940460205, "logits/real": -2.261821746826172, "logps/generated": -329.22186279296875, "logps/oppo_gen": -55.247596740722656, "logps/oppo_real": -159.6094970703125, "logps/real": -214.50485229492188, "loss": 0.5733, "loss/gen": 0.1604616641998291, "loss/real": 0.5549860000610352, "rewards/accuracies": 1.0, "rewards/generated": -273.9742431640625, "rewards/margins": 219.07891845703125, "rewards/real": -54.895347595214844, "step": 57 }, { "epoch": 0.24, "grad_norm": 54.18406446361213, "learning_rate": 4.925213675213676e-07, "logits/generated": -1.972760558128357, "logits/oppo_generated": -2.733177900314331, "logits/oppo_real": -3.0261659622192383, "logits/real": -2.0272536277770996, "logps/generated": -270.2849426269531, "logps/oppo_gen": -77.4105453491211, "logps/oppo_real": -291.50042724609375, "logps/real": -314.6445617675781, "loss": 0.6405, "loss/gen": 0.24113653600215912, "loss/real": 0.25997018814086914, "rewards/accuracies": 1.0, "rewards/generated": -192.8743896484375, "rewards/margins": 169.73028564453125, "rewards/real": -23.144100189208984, "step": 58 }, { "epoch": 0.25, "grad_norm": 31.552650652260482, "learning_rate": 4.921652421652421e-07, "logits/generated": -1.6506314277648926, "logits/oppo_generated": -2.70068359375, "logits/oppo_real": -2.622352361679077, "logits/real": -1.8686270713806152, "logps/generated": -331.11773681640625, "logps/oppo_gen": -66.53448486328125, "logps/oppo_real": -142.07913208007812, "logps/real": -227.437744140625, "loss": 0.6586, "loss/gen": 0.03464512526988983, "loss/real": 0.853585958480835, "rewards/accuracies": 1.0, "rewards/generated": -264.5832824707031, "rewards/margins": 179.22467041015625, "rewards/real": -85.35860443115234, "step": 59 }, { "epoch": 0.25, "grad_norm": 27.14020221317216, "learning_rate": 4.918091168091168e-07, "logits/generated": -2.1259684562683105, "logits/oppo_generated": -3.0608558654785156, "logits/oppo_real": -3.0881457328796387, "logits/real": -2.3352560997009277, "logps/generated": -275.365478515625, "logps/oppo_gen": -78.30126953125, "logps/oppo_real": -296.7585144042969, "logps/real": -317.5572509765625, "loss": 0.567, "loss/gen": 0.22492240369319916, "loss/real": 0.27364999055862427, "rewards/accuracies": 1.0, "rewards/generated": -197.064208984375, "rewards/margins": 176.26547241210938, "rewards/real": -20.798734664916992, "step": 60 }, { "epoch": 0.26, "grad_norm": 37.116987705997744, "learning_rate": 4.914529914529914e-07, "logits/generated": -2.0405826568603516, "logits/oppo_generated": -2.904336929321289, "logits/oppo_real": -3.0007967948913574, "logits/real": -2.274019956588745, "logps/generated": -340.4947509765625, "logps/oppo_gen": -78.76142883300781, "logps/oppo_real": -321.17315673828125, "logps/real": -368.8492431640625, "loss": 0.5723, "loss/gen": 0.07833881676197052, "loss/real": 0.5737317800521851, "rewards/accuracies": 0.875, "rewards/generated": -261.7333068847656, "rewards/margins": 214.0572509765625, "rewards/real": -47.676055908203125, "step": 61 }, { "epoch": 0.26, "grad_norm": 28.903528165773942, "learning_rate": 4.910968660968661e-07, "logits/generated": -2.1163697242736816, "logits/oppo_generated": -3.0246148109436035, "logits/oppo_real": -3.155604839324951, "logits/real": -2.3015458583831787, "logps/generated": -325.52911376953125, "logps/oppo_gen": -99.78816986083984, "logps/oppo_real": -357.6624755859375, "logps/real": -369.4306640625, "loss": 0.4923, "loss/gen": 0.08446104824542999, "loss/real": 0.1771288365125656, "rewards/accuracies": 1.0, "rewards/generated": -225.74095153808594, "rewards/margins": 213.97276306152344, "rewards/real": -11.768176078796387, "step": 62 }, { "epoch": 0.26, "grad_norm": 42.85362560553147, "learning_rate": 4.907407407407407e-07, "logits/generated": -1.962327480316162, "logits/oppo_generated": -2.718918800354004, "logits/oppo_real": -2.8950438499450684, "logits/real": -2.042083740234375, "logps/generated": -230.06312561035156, "logps/oppo_gen": -73.73533630371094, "logps/oppo_real": -276.2977294921875, "logps/real": -287.68817138671875, "loss": 0.4966, "loss/gen": 0.5271965861320496, "loss/real": 0.24724090099334717, "rewards/accuracies": 0.875, "rewards/generated": -156.3278045654297, "rewards/margins": 144.93739318847656, "rewards/real": -11.390399932861328, "step": 63 }, { "epoch": 0.27, "grad_norm": 60.4540336582008, "learning_rate": 4.903846153846153e-07, "logits/generated": -2.031083822250366, "logits/oppo_generated": -2.7741386890411377, "logits/oppo_real": -2.8905487060546875, "logits/real": -2.146413803100586, "logps/generated": -242.85926818847656, "logps/oppo_gen": -70.42605590820312, "logps/oppo_real": -291.8798522949219, "logps/real": -347.0901794433594, "loss": 0.5204, "loss/gen": 0.3330785036087036, "loss/real": 0.5649424195289612, "rewards/accuracies": 1.0, "rewards/generated": -172.43319702148438, "rewards/margins": 117.2228775024414, "rewards/real": -55.21034240722656, "step": 64 }, { "epoch": 0.27, "grad_norm": 60.4540336582008, "learning_rate": 4.903846153846153e-07, "logits/generated": -1.8672581911087036, "logits/oppo_generated": -2.731257438659668, "logits/oppo_real": -2.804780960083008, "logits/real": -2.0895280838012695, "logps/generated": -339.7601013183594, "logps/oppo_gen": -143.67832946777344, "logps/oppo_real": -309.55450439453125, "logps/real": -320.901123046875, "loss": 0.3716, "loss/gen": 0.2359689623117447, "loss/real": 0.16374921798706055, "rewards/accuracies": 1.0, "rewards/generated": -196.081787109375, "rewards/margins": 184.73516845703125, "rewards/real": -11.34660816192627, "step": 65 }, { "epoch": 0.28, "grad_norm": 36.390479081394865, "learning_rate": 4.9002849002849e-07, "logits/generated": -1.936166524887085, "logits/oppo_generated": -2.710496664047241, "logits/oppo_real": -2.980191707611084, "logits/real": -1.961893081665039, "logps/generated": -314.8875732421875, "logps/oppo_gen": -71.51214599609375, "logps/oppo_real": -284.34765625, "logps/real": -309.43060302734375, "loss": 0.4212, "loss/gen": 0.0, "loss/real": 0.35151734948158264, "rewards/accuracies": 1.0, "rewards/generated": -243.37542724609375, "rewards/margins": 218.29248046875, "rewards/real": -25.082937240600586, "step": 66 }, { "epoch": 0.28, "grad_norm": 68.84343437853207, "learning_rate": 4.896723646723647e-07, "logits/generated": -2.055253267288208, "logits/oppo_generated": -3.0934062004089355, "logits/oppo_real": -3.077010154724121, "logits/real": -2.3065128326416016, "logps/generated": -340.2142333984375, "logps/oppo_gen": -109.1805419921875, "logps/oppo_real": -348.23834228515625, "logps/real": -346.0427551269531, "loss": 0.5169, "loss/gen": 0.03457939624786377, "loss/real": 0.09117183089256287, "rewards/accuracies": 1.0, "rewards/generated": -231.03366088867188, "rewards/margins": 233.22927856445312, "rewards/real": 2.195611000061035, "step": 67 }, { "epoch": 0.28, "grad_norm": 34.09506041248864, "learning_rate": 4.893162393162393e-07, "logits/generated": -1.9534183740615845, "logits/oppo_generated": -2.838265895843506, "logits/oppo_real": -3.01387357711792, "logits/real": -2.075807809829712, "logps/generated": -265.85076904296875, "logps/oppo_gen": -75.5096206665039, "logps/oppo_real": -242.11915588378906, "logps/real": -281.80438232421875, "loss": 0.5159, "loss/gen": 0.18575912714004517, "loss/real": 0.5187460780143738, "rewards/accuracies": 0.875, "rewards/generated": -190.34115600585938, "rewards/margins": 150.65594482421875, "rewards/real": -39.68519973754883, "step": 68 }, { "epoch": 0.29, "grad_norm": 34.22783840494459, "learning_rate": 4.889601139601139e-07, "logits/generated": -1.8958648443222046, "logits/oppo_generated": -2.786154270172119, "logits/oppo_real": -2.980445146560669, "logits/real": -1.9751079082489014, "logps/generated": -315.6554870605469, "logps/oppo_gen": -78.40753173828125, "logps/oppo_real": -188.29739379882812, "logps/real": -222.51780700683594, "loss": 0.4878, "loss/gen": 0.023022428154945374, "loss/real": 0.41188403964042664, "rewards/accuracies": 1.0, "rewards/generated": -237.24795532226562, "rewards/margins": 203.02752685546875, "rewards/real": -34.220428466796875, "step": 69 }, { "epoch": 0.29, "grad_norm": 35.71409671406464, "learning_rate": 4.886039886039886e-07, "logits/generated": -1.8381710052490234, "logits/oppo_generated": -2.8353500366210938, "logits/oppo_real": -2.788581371307373, "logits/real": -1.96791410446167, "logps/generated": -293.2041015625, "logps/oppo_gen": -74.27359008789062, "logps/oppo_real": -262.4258728027344, "logps/real": -312.2054443359375, "loss": 0.458, "loss/gen": 0.1743556559085846, "loss/real": 0.5692518949508667, "rewards/accuracies": 0.875, "rewards/generated": -218.93051147460938, "rewards/margins": 169.15093994140625, "rewards/real": -49.77956771850586, "step": 70 }, { "epoch": 0.3, "grad_norm": 47.1891271449592, "learning_rate": 4.882478632478633e-07, "logits/generated": -1.840759515762329, "logits/oppo_generated": -2.8188014030456543, "logits/oppo_real": -2.757133960723877, "logits/real": -1.9096312522888184, "logps/generated": -285.27850341796875, "logps/oppo_gen": -55.317054748535156, "logps/oppo_real": -178.10824584960938, "logps/real": -215.8996124267578, "loss": 0.4616, "loss/gen": 0.0, "loss/real": 0.41009002923965454, "rewards/accuracies": 1.0, "rewards/generated": -229.96144104003906, "rewards/margins": 192.17007446289062, "rewards/real": -37.79136276245117, "step": 71 }, { "epoch": 0.3, "grad_norm": 38.070165804919505, "learning_rate": 4.878917378917379e-07, "logits/generated": -1.9046831130981445, "logits/oppo_generated": -2.865746259689331, "logits/oppo_real": -2.85042142868042, "logits/real": -2.0951907634735107, "logps/generated": -316.7037658691406, "logps/oppo_gen": -101.81581115722656, "logps/oppo_real": -463.47314453125, "logps/real": -466.7184143066406, "loss": 0.4761, "loss/gen": 0.05637046694755554, "loss/real": 0.1776004135608673, "rewards/accuracies": 1.0, "rewards/generated": -214.88796997070312, "rewards/margins": 211.6427001953125, "rewards/real": -3.2452640533447266, "step": 72 }, { "epoch": 0.31, "grad_norm": 36.46849560751033, "learning_rate": 4.875356125356125e-07, "logits/generated": -1.9553462266921997, "logits/oppo_generated": -2.9923882484436035, "logits/oppo_real": -2.813816547393799, "logits/real": -2.1159887313842773, "logps/generated": -345.8742980957031, "logps/oppo_gen": -78.51251220703125, "logps/oppo_real": -286.4658508300781, "logps/real": -274.63427734375, "loss": 0.4137, "loss/gen": 0.007427394390106201, "loss/real": 0.03321278840303421, "rewards/accuracies": 1.0, "rewards/generated": -267.3617858886719, "rewards/margins": 279.1933898925781, "rewards/real": 11.83156967163086, "step": 73 }, { "epoch": 0.31, "grad_norm": 31.978947389991983, "learning_rate": 4.871794871794871e-07, "logits/generated": -2.1326825618743896, "logits/oppo_generated": -2.7725887298583984, "logits/oppo_real": -3.063380002975464, "logits/real": -2.051950216293335, "logps/generated": -261.63037109375, "logps/oppo_gen": -79.40229034423828, "logps/oppo_real": -383.419677734375, "logps/real": -401.0813293457031, "loss": 0.4048, "loss/gen": 0.24421586096286774, "loss/real": 0.2781957685947418, "rewards/accuracies": 1.0, "rewards/generated": -182.2280731201172, "rewards/margins": 164.56642150878906, "rewards/real": -17.661649703979492, "step": 74 }, { "epoch": 0.31, "grad_norm": 43.62784005350683, "learning_rate": 4.868233618233618e-07, "logits/generated": -2.1139259338378906, "logits/oppo_generated": -2.8321666717529297, "logits/oppo_real": -3.1668171882629395, "logits/real": -2.0823147296905518, "logps/generated": -363.62158203125, "logps/oppo_gen": -99.83964538574219, "logps/oppo_real": -322.6613464355469, "logps/real": -310.8186950683594, "loss": 0.3929, "loss/gen": 0.28169000148773193, "loss/real": 0.1750582456588745, "rewards/accuracies": 1.0, "rewards/generated": -263.7819519042969, "rewards/margins": 275.6246337890625, "rewards/real": 11.842658042907715, "step": 75 }, { "epoch": 0.32, "grad_norm": 32.57134409346178, "learning_rate": 4.864672364672365e-07, "logits/generated": -2.2618470191955566, "logits/oppo_generated": -3.000812530517578, "logits/oppo_real": -3.1619484424591064, "logits/real": -2.332085609436035, "logps/generated": -293.44439697265625, "logps/oppo_gen": -83.82888793945312, "logps/oppo_real": -441.3746337890625, "logps/real": -436.41595458984375, "loss": 0.4213, "loss/gen": 0.061429619789123535, "loss/real": 0.08119938522577286, "rewards/accuracies": 1.0, "rewards/generated": -209.61550903320312, "rewards/margins": 214.57421875, "rewards/real": 4.958704948425293, "step": 76 }, { "epoch": 0.32, "grad_norm": 56.40902867117107, "learning_rate": 4.861111111111111e-07, "logits/generated": -1.467146635055542, "logits/oppo_generated": -2.4111037254333496, "logits/oppo_real": -2.622360944747925, "logits/real": -1.5239063501358032, "logps/generated": -283.9698791503906, "logps/oppo_gen": -94.29784393310547, "logps/oppo_real": -307.8828125, "logps/real": -318.51324462890625, "loss": 0.4075, "loss/gen": 0.1725415140390396, "loss/real": 0.24915428459644318, "rewards/accuracies": 1.0, "rewards/generated": -189.67201232910156, "rewards/margins": 179.04159545898438, "rewards/real": -10.630415916442871, "step": 77 }, { "epoch": 0.33, "grad_norm": 57.75006764065303, "learning_rate": 4.857549857549857e-07, "logits/generated": -1.9342730045318604, "logits/oppo_generated": -2.7816574573516846, "logits/oppo_real": -2.923349380493164, "logits/real": -2.000138759613037, "logps/generated": -264.1120300292969, "logps/oppo_gen": -70.22672271728516, "logps/oppo_real": -286.0644836425781, "logps/real": -369.57379150390625, "loss": 0.414, "loss/gen": 0.20479409396648407, "loss/real": 0.8575762510299683, "rewards/accuracies": 0.875, "rewards/generated": -193.8852996826172, "rewards/margins": 110.37598419189453, "rewards/real": -83.50932312011719, "step": 78 }, { "epoch": 0.33, "grad_norm": 115.49377688862721, "learning_rate": 4.853988603988603e-07, "logits/generated": -1.7300872802734375, "logits/oppo_generated": -2.624129056930542, "logits/oppo_real": -2.6314826011657715, "logits/real": -1.706296682357788, "logps/generated": -254.1842498779297, "logps/oppo_gen": -48.185340881347656, "logps/oppo_real": -148.66656494140625, "logps/real": -187.1585693359375, "loss": 0.3277, "loss/gen": 0.08585792779922485, "loss/real": 0.4419286251068115, "rewards/accuracies": 1.0, "rewards/generated": -205.9989013671875, "rewards/margins": 167.50689697265625, "rewards/real": -38.492000579833984, "step": 79 }, { "epoch": 0.33, "grad_norm": 63.99712937592885, "learning_rate": 4.850427350427351e-07, "logits/generated": -1.8802506923675537, "logits/oppo_generated": -2.668670177459717, "logits/oppo_real": -2.9500231742858887, "logits/real": -1.7524856328964233, "logps/generated": -292.76727294921875, "logps/oppo_gen": -76.79248809814453, "logps/oppo_real": -287.1414794921875, "logps/real": -329.4079284667969, "loss": 0.34, "loss/gen": 0.22407092154026031, "loss/real": 0.4708248972892761, "rewards/accuracies": 1.0, "rewards/generated": -215.97479248046875, "rewards/margins": 173.7083740234375, "rewards/real": -42.266422271728516, "step": 80 }, { "epoch": 0.34, "grad_norm": 158.62792427367572, "learning_rate": 4.846866096866097e-07, "logits/generated": -2.008450984954834, "logits/oppo_generated": -2.8624868392944336, "logits/oppo_real": -3.0077338218688965, "logits/real": -1.9637665748596191, "logps/generated": -250.85487365722656, "logps/oppo_gen": -103.01863861083984, "logps/oppo_real": -484.10565185546875, "logps/real": -474.10980224609375, "loss": 0.554, "loss/gen": 0.6981667280197144, "loss/real": 0.05877792090177536, "rewards/accuracies": 1.0, "rewards/generated": -147.83624267578125, "rewards/margins": 157.83209228515625, "rewards/real": 9.995855331420898, "step": 81 }, { "epoch": 0.34, "grad_norm": 94.57272850331907, "learning_rate": 4.843304843304843e-07, "logits/generated": -1.8665781021118164, "logits/oppo_generated": -2.976921796798706, "logits/oppo_real": -3.0094780921936035, "logits/real": -1.9989147186279297, "logps/generated": -280.32012939453125, "logps/oppo_gen": -66.51390075683594, "logps/oppo_real": -174.39071655273438, "logps/real": -190.4137420654297, "loss": 0.5, "loss/gen": 0.09350240230560303, "loss/real": 0.278587281703949, "rewards/accuracies": 1.0, "rewards/generated": -213.8062286376953, "rewards/margins": 197.783203125, "rewards/real": -16.02302360534668, "step": 82 }, { "epoch": 0.35, "grad_norm": 46.788289592255346, "learning_rate": 4.839743589743589e-07, "logits/generated": -1.8179612159729004, "logits/oppo_generated": -3.01529598236084, "logits/oppo_real": -2.9185380935668945, "logits/real": -2.0011558532714844, "logps/generated": -466.2247619628906, "logps/oppo_gen": -86.220458984375, "logps/oppo_real": -329.8023376464844, "logps/real": -332.1396179199219, "loss": 0.4253, "loss/gen": 0.05652913451194763, "loss/real": 0.2249125838279724, "rewards/accuracies": 1.0, "rewards/generated": -380.00433349609375, "rewards/margins": 377.66705322265625, "rewards/real": -2.3372955322265625, "step": 83 }, { "epoch": 0.35, "grad_norm": 45.30856125122767, "learning_rate": 4.836182336182337e-07, "logits/generated": -1.641934871673584, "logits/oppo_generated": -2.864108085632324, "logits/oppo_real": -2.8596436977386475, "logits/real": -1.7771339416503906, "logps/generated": -308.81744384765625, "logps/oppo_gen": -79.35113525390625, "logps/oppo_real": -357.43438720703125, "logps/real": -351.2286376953125, "loss": 0.4106, "loss/gen": 0.02170167863368988, "loss/real": 0.11313143372535706, "rewards/accuracies": 1.0, "rewards/generated": -229.46632385253906, "rewards/margins": 235.67205810546875, "rewards/real": 6.2057342529296875, "step": 84 }, { "epoch": 0.36, "grad_norm": 60.23657786179424, "learning_rate": 4.832621082621082e-07, "logits/generated": -1.7438234090805054, "logits/oppo_generated": -2.635812282562256, "logits/oppo_real": -2.784547805786133, "logits/real": -1.6820318698883057, "logps/generated": -362.5193786621094, "logps/oppo_gen": -87.48421478271484, "logps/oppo_real": -250.10626220703125, "logps/real": -273.316162109375, "loss": 0.3701, "loss/gen": 0.0328507125377655, "loss/real": 0.2887704372406006, "rewards/accuracies": 1.0, "rewards/generated": -275.03515625, "rewards/margins": 251.82522583007812, "rewards/real": -23.209918975830078, "step": 85 }, { "epoch": 0.36, "grad_norm": 78.00425982458425, "learning_rate": 4.829059829059829e-07, "logits/generated": -1.6737346649169922, "logits/oppo_generated": -2.9845218658447266, "logits/oppo_real": -3.016307830810547, "logits/real": -2.0023183822631836, "logps/generated": -242.6670379638672, "logps/oppo_gen": -55.523197174072266, "logps/oppo_real": -291.81378173828125, "logps/real": -357.2690734863281, "loss": 0.4034, "loss/gen": 0.28107643127441406, "loss/real": 0.7271257638931274, "rewards/accuracies": 0.75, "rewards/generated": -187.14382934570312, "rewards/margins": 121.68854522705078, "rewards/real": -65.45529174804688, "step": 86 }, { "epoch": 0.36, "grad_norm": 36.09174608885233, "learning_rate": 4.825498575498575e-07, "logits/generated": -1.8338725566864014, "logits/oppo_generated": -2.8317785263061523, "logits/oppo_real": -2.849785327911377, "logits/real": -1.9712032079696655, "logps/generated": -256.17181396484375, "logps/oppo_gen": -65.48351287841797, "logps/oppo_real": -259.8980712890625, "logps/real": -279.2626953125, "loss": 0.3141, "loss/gen": 0.18348117172718048, "loss/real": 0.2724674642086029, "rewards/accuracies": 1.0, "rewards/generated": -190.68829345703125, "rewards/margins": 171.32366943359375, "rewards/real": -19.364639282226562, "step": 87 }, { "epoch": 0.37, "grad_norm": 43.09703010540576, "learning_rate": 4.821937321937321e-07, "logits/generated": -1.9401007890701294, "logits/oppo_generated": -2.9616637229919434, "logits/oppo_real": -2.8549320697784424, "logits/real": -2.2171854972839355, "logps/generated": -271.0581359863281, "logps/oppo_gen": -66.1073226928711, "logps/oppo_real": -297.0393981933594, "logps/real": -283.0654602050781, "loss": 0.3459, "loss/gen": 0.11610506474971771, "loss/real": 0.01628967374563217, "rewards/accuracies": 1.0, "rewards/generated": -204.95083618164062, "rewards/margins": 218.9247589111328, "rewards/real": 13.973949432373047, "step": 88 }, { "epoch": 0.37, "grad_norm": 39.01662618859587, "learning_rate": 4.818376068376069e-07, "logits/generated": -2.057642936706543, "logits/oppo_generated": -2.944060802459717, "logits/oppo_real": -2.977362632751465, "logits/real": -2.0833921432495117, "logps/generated": -284.1174621582031, "logps/oppo_gen": -49.032493591308594, "logps/oppo_real": -197.13412475585938, "logps/real": -235.72854614257812, "loss": 0.3748, "loss/gen": 0.1771981120109558, "loss/real": 0.44538283348083496, "rewards/accuracies": 1.0, "rewards/generated": -235.0849609375, "rewards/margins": 196.49057006835938, "rewards/real": -38.594398498535156, "step": 89 }, { "epoch": 0.38, "grad_norm": 76.44660552368398, "learning_rate": 4.814814814814814e-07, "logits/generated": -2.012396812438965, "logits/oppo_generated": -2.9935152530670166, "logits/oppo_real": -2.782620906829834, "logits/real": -2.228747844696045, "logps/generated": -225.5810546875, "logps/oppo_gen": -79.41259002685547, "logps/oppo_real": -304.58465576171875, "logps/real": -294.3427734375, "loss": 0.4461, "loss/gen": 0.6170589327812195, "loss/real": 0.013846360146999359, "rewards/accuracies": 1.0, "rewards/generated": -146.16847229003906, "rewards/margins": 156.41033935546875, "rewards/real": 10.241872787475586, "step": 90 }, { "epoch": 0.38, "grad_norm": 44.887227127320834, "learning_rate": 4.811253561253561e-07, "logits/generated": -1.8049852848052979, "logits/oppo_generated": -3.0348973274230957, "logits/oppo_real": -2.8550362586975098, "logits/real": -2.1674275398254395, "logps/generated": -334.69403076171875, "logps/oppo_gen": -147.11734008789062, "logps/oppo_real": -324.0049743652344, "logps/real": -330.11480712890625, "loss": 0.3668, "loss/gen": 0.32383447885513306, "loss/real": 0.19668863713741302, "rewards/accuracies": 1.0, "rewards/generated": -187.57669067382812, "rewards/margins": 181.46685791015625, "rewards/real": -6.109820365905762, "step": 91 }, { "epoch": 0.38, "grad_norm": 33.3576313215254, "learning_rate": 4.807692307692307e-07, "logits/generated": -1.867674469947815, "logits/oppo_generated": -2.8708338737487793, "logits/oppo_real": -2.8143606185913086, "logits/real": -2.028439998626709, "logps/generated": -307.528564453125, "logps/oppo_gen": -81.77798461914062, "logps/oppo_real": -330.5220031738281, "logps/real": -330.1318054199219, "loss": 0.355, "loss/gen": 0.2485622763633728, "loss/real": 0.1294267177581787, "rewards/accuracies": 1.0, "rewards/generated": -225.75057983398438, "rewards/margins": 226.1407470703125, "rewards/real": 0.3901691436767578, "step": 92 }, { "epoch": 0.39, "grad_norm": 48.87256477708762, "learning_rate": 4.804131054131054e-07, "logits/generated": -1.6193779706954956, "logits/oppo_generated": -2.7298922538757324, "logits/oppo_real": -2.698655605316162, "logits/real": -1.7858012914657593, "logps/generated": -299.572998046875, "logps/oppo_gen": -74.60616302490234, "logps/oppo_real": -251.41427612304688, "logps/real": -266.9776306152344, "loss": 0.3602, "loss/gen": 0.04555131494998932, "loss/real": 0.34169435501098633, "rewards/accuracies": 1.0, "rewards/generated": -224.9668426513672, "rewards/margins": 209.40345764160156, "rewards/real": -15.563373565673828, "step": 93 }, { "epoch": 0.39, "grad_norm": 63.47711970909282, "learning_rate": 4.8005698005698e-07, "logits/generated": -1.8660156726837158, "logits/oppo_generated": -2.9584808349609375, "logits/oppo_real": -2.8358330726623535, "logits/real": -2.125810384750366, "logps/generated": -329.8253479003906, "logps/oppo_gen": -83.23335266113281, "logps/oppo_real": -311.66064453125, "logps/real": -308.5706787109375, "loss": 0.3475, "loss/gen": 0.030205443501472473, "loss/real": 0.12542490661144257, "rewards/accuracies": 1.0, "rewards/generated": -246.59197998046875, "rewards/margins": 249.68197631835938, "rewards/real": 3.089975357055664, "step": 94 }, { "epoch": 0.4, "grad_norm": 41.84529030025362, "learning_rate": 4.797008547008547e-07, "logits/generated": -1.794067621231079, "logits/oppo_generated": -2.83894681930542, "logits/oppo_real": -2.731696605682373, "logits/real": -1.9485492706298828, "logps/generated": -330.9621887207031, "logps/oppo_gen": -103.72628021240234, "logps/oppo_real": -218.9561767578125, "logps/real": -219.8974609375, "loss": 0.307, "loss/gen": 0.06028883159160614, "loss/real": 0.14670495688915253, "rewards/accuracies": 1.0, "rewards/generated": -227.23593139648438, "rewards/margins": 226.29464721679688, "rewards/real": -0.9412956237792969, "step": 95 }, { "epoch": 0.4, "grad_norm": 32.90370323418684, "learning_rate": 4.793447293447293e-07, "logits/generated": -1.8011112213134766, "logits/oppo_generated": -2.7633142471313477, "logits/oppo_real": -2.9560418128967285, "logits/real": -1.7214103937149048, "logps/generated": -348.088134765625, "logps/oppo_gen": -74.91079711914062, "logps/oppo_real": -299.2713623046875, "logps/real": -282.55438232421875, "loss": 0.3138, "loss/gen": 0.033689409494400024, "loss/real": 0.08471229672431946, "rewards/accuracies": 1.0, "rewards/generated": -273.1773376464844, "rewards/margins": 289.8943176269531, "rewards/real": 16.716989517211914, "step": 96 }, { "epoch": 0.41, "grad_norm": 63.98752090862131, "learning_rate": 4.78988603988604e-07, "logits/generated": -1.8813642263412476, "logits/oppo_generated": -2.8308515548706055, "logits/oppo_real": -3.085522174835205, "logits/real": -1.916734218597412, "logps/generated": -354.05621337890625, "logps/oppo_gen": -134.01483154296875, "logps/oppo_real": -442.37945556640625, "logps/real": -426.1463928222656, "loss": 0.3186, "loss/gen": 0.08905501663684845, "loss/real": 0.009031563997268677, "rewards/accuracies": 1.0, "rewards/generated": -220.04135131835938, "rewards/margins": 236.27442932128906, "rewards/real": 16.233068466186523, "step": 97 }, { "epoch": 0.41, "grad_norm": 28.016905994138163, "learning_rate": 4.786324786324786e-07, "logits/generated": -1.7203209400177002, "logits/oppo_generated": -2.8044867515563965, "logits/oppo_real": -2.8060150146484375, "logits/real": -1.783670425415039, "logps/generated": -276.6085205078125, "logps/oppo_gen": -51.423309326171875, "logps/oppo_real": -222.54879760742188, "logps/real": -233.36328125, "loss": 0.2702, "loss/gen": 0.014314472675323486, "loss/real": 0.26025110483169556, "rewards/accuracies": 1.0, "rewards/generated": -225.1852264404297, "rewards/margins": 214.37075805664062, "rewards/real": -10.814473152160645, "step": 98 }, { "epoch": 0.41, "grad_norm": 42.268631478097916, "learning_rate": 4.782763532763532e-07, "logits/generated": -1.8753924369812012, "logits/oppo_generated": -2.932793140411377, "logits/oppo_real": -2.9959638118743896, "logits/real": -2.070061683654785, "logps/generated": -297.700927734375, "logps/oppo_gen": -68.20332336425781, "logps/oppo_real": -376.541015625, "logps/real": -386.68438720703125, "loss": 0.2846, "loss/gen": 0.0, "loss/real": 0.2723137140274048, "rewards/accuracies": 1.0, "rewards/generated": -229.4976043701172, "rewards/margins": 219.35421752929688, "rewards/real": -10.143372535705566, "step": 99 }, { "epoch": 0.42, "grad_norm": 62.84870885768557, "learning_rate": 4.779202279202279e-07, "logits/generated": -1.745549201965332, "logits/oppo_generated": -2.780601739883423, "logits/oppo_real": -2.8726038932800293, "logits/real": -1.851616621017456, "logps/generated": -308.7665100097656, "logps/oppo_gen": -75.83106994628906, "logps/oppo_real": -327.609619140625, "logps/real": -323.2357177734375, "loss": 0.3766, "loss/gen": 0.17890962958335876, "loss/real": 0.07637953758239746, "rewards/accuracies": 1.0, "rewards/generated": -232.93545532226562, "rewards/margins": 237.3093719482422, "rewards/real": 4.373929977416992, "step": 100 }, { "epoch": 0.42, "grad_norm": 80.61028208687152, "learning_rate": 4.775641025641026e-07, "logits/generated": -1.832048773765564, "logits/oppo_generated": -2.91953706741333, "logits/oppo_real": -2.820370674133301, "logits/real": -2.110717535018921, "logps/generated": -299.78704833984375, "logps/oppo_gen": -75.91517639160156, "logps/oppo_real": -531.0400390625, "logps/real": -533.7149047851562, "loss": 0.3108, "loss/gen": 0.07047438621520996, "loss/real": 0.11644826829433441, "rewards/accuracies": 1.0, "rewards/generated": -223.87188720703125, "rewards/margins": 221.1970672607422, "rewards/real": -2.6748085021972656, "step": 101 }, { "epoch": 0.43, "grad_norm": 21.048619255395202, "learning_rate": 4.772079772079772e-07, "logits/generated": -1.83795964717865, "logits/oppo_generated": -2.927794933319092, "logits/oppo_real": -2.8259315490722656, "logits/real": -2.103085517883301, "logps/generated": -293.03070068359375, "logps/oppo_gen": -75.32722473144531, "logps/oppo_real": -334.3116149902344, "logps/real": -329.84771728515625, "loss": 0.2749, "loss/gen": 0.1955680400133133, "loss/real": 0.08636181056499481, "rewards/accuracies": 1.0, "rewards/generated": -217.70347595214844, "rewards/margins": 222.16734313964844, "rewards/real": 4.463872909545898, "step": 102 }, { "epoch": 0.43, "grad_norm": 27.770455184464364, "learning_rate": 4.768518518518518e-07, "logits/generated": -1.8320189714431763, "logits/oppo_generated": -2.798323154449463, "logits/oppo_real": -3.0827927589416504, "logits/real": -2.0010604858398438, "logps/generated": -309.42236328125, "logps/oppo_gen": -85.98326110839844, "logps/oppo_real": -484.7052001953125, "logps/real": -477.567626953125, "loss": 0.2701, "loss/gen": 0.009465828537940979, "loss/real": 0.04307375103235245, "rewards/accuracies": 1.0, "rewards/generated": -223.43910217285156, "rewards/margins": 230.57669067382812, "rewards/real": 7.137579441070557, "step": 103 }, { "epoch": 0.44, "grad_norm": 35.29567146022477, "learning_rate": 4.764957264957264e-07, "logits/generated": -1.6535446643829346, "logits/oppo_generated": -2.820817232131958, "logits/oppo_real": -2.7580766677856445, "logits/real": -1.9198226928710938, "logps/generated": -388.9769287109375, "logps/oppo_gen": -98.39456176757812, "logps/oppo_real": -435.86871337890625, "logps/real": -446.45001220703125, "loss": 0.3203, "loss/gen": 0.04320457577705383, "loss/real": 0.25816553831100464, "rewards/accuracies": 1.0, "rewards/generated": -290.58233642578125, "rewards/margins": 280.00103759765625, "rewards/real": -10.581292152404785, "step": 104 }, { "epoch": 0.44, "grad_norm": 23.55976183940932, "learning_rate": 4.761396011396011e-07, "logits/generated": -1.8841466903686523, "logits/oppo_generated": -2.991581439971924, "logits/oppo_real": -3.002182960510254, "logits/real": -2.131711483001709, "logps/generated": -322.656494140625, "logps/oppo_gen": -81.12940216064453, "logps/oppo_real": -296.61138916015625, "logps/real": -295.33599853515625, "loss": 0.2212, "loss/gen": 0.0, "loss/real": 0.21376575529575348, "rewards/accuracies": 1.0, "rewards/generated": -241.52708435058594, "rewards/margins": 242.80247497558594, "rewards/real": 1.2753915786743164, "step": 105 }, { "epoch": 0.44, "grad_norm": 23.497506516649803, "learning_rate": 4.7578347578347577e-07, "logits/generated": -1.910527229309082, "logits/oppo_generated": -2.8433456420898438, "logits/oppo_real": -3.012195110321045, "logits/real": -1.9440618753433228, "logps/generated": -244.58482360839844, "logps/oppo_gen": -63.396881103515625, "logps/oppo_real": -288.55780029296875, "logps/real": -273.79388427734375, "loss": 0.1928, "loss/gen": 0.48318159580230713, "loss/real": 0.007603831589221954, "rewards/accuracies": 1.0, "rewards/generated": -181.18792724609375, "rewards/margins": 195.951904296875, "rewards/real": 14.763958930969238, "step": 106 }, { "epoch": 0.45, "grad_norm": 60.16537065598345, "learning_rate": 4.754273504273504e-07, "logits/generated": -1.7519464492797852, "logits/oppo_generated": -2.75607967376709, "logits/oppo_real": -3.044626235961914, "logits/real": -1.8694126605987549, "logps/generated": -350.42431640625, "logps/oppo_gen": -89.79308319091797, "logps/oppo_real": -237.51071166992188, "logps/real": -241.78152465820312, "loss": 0.2889, "loss/gen": 0.0006802082061767578, "loss/real": 0.14765188097953796, "rewards/accuracies": 1.0, "rewards/generated": -260.6312561035156, "rewards/margins": 256.36041259765625, "rewards/real": -4.270831108093262, "step": 107 }, { "epoch": 0.45, "grad_norm": 60.16537065598345, "learning_rate": 4.754273504273504e-07, "logits/generated": -2.0505292415618896, "logits/oppo_generated": -2.9334537982940674, "logits/oppo_real": -3.0197911262512207, "logits/real": -2.080873966217041, "logps/generated": -322.20416259765625, "logps/oppo_gen": -86.25882720947266, "logps/oppo_real": -171.73361206054688, "logps/real": -155.16622924804688, "loss": 0.2352, "loss/gen": 0.0, "loss/real": 0.036911122500896454, "rewards/accuracies": 1.0, "rewards/generated": -235.94532775878906, "rewards/margins": 252.5127410888672, "rewards/real": 16.567398071289062, "step": 108 }, { "epoch": 0.46, "grad_norm": 61.99013711558779, "learning_rate": 4.7507122507122507e-07, "logits/generated": -1.7211092710494995, "logits/oppo_generated": -2.8885016441345215, "logits/oppo_real": -2.9670629501342773, "logits/real": -2.023648977279663, "logps/generated": -242.21588134765625, "logps/oppo_gen": -52.36747741699219, "logps/oppo_real": -234.88699340820312, "logps/real": -226.70179748535156, "loss": 0.2528, "loss/gen": 0.2596120834350586, "loss/real": 0.1372058391571045, "rewards/accuracies": 1.0, "rewards/generated": -189.84840393066406, "rewards/margins": 198.03363037109375, "rewards/real": 8.185225486755371, "step": 109 }, { "epoch": 0.46, "grad_norm": 52.08842110566656, "learning_rate": 4.747150997150997e-07, "logits/generated": -1.8464903831481934, "logits/oppo_generated": -2.902094841003418, "logits/oppo_real": -2.738150119781494, "logits/real": -2.1050400733947754, "logps/generated": -330.8840026855469, "logps/oppo_gen": -71.77503967285156, "logps/oppo_real": -226.59805297851562, "logps/real": -215.78268432617188, "loss": 0.2926, "loss/gen": 0.0, "loss/real": 0.001055300235748291, "rewards/accuracies": 1.0, "rewards/generated": -259.1089782714844, "rewards/margins": 269.92437744140625, "rewards/real": 10.815394401550293, "step": 110 }, { "epoch": 0.46, "grad_norm": 48.25866937579066, "learning_rate": 4.743589743589743e-07, "logits/generated": -1.7407563924789429, "logits/oppo_generated": -2.78233003616333, "logits/oppo_real": -2.810633420944214, "logits/real": -1.92085862159729, "logps/generated": -262.89617919921875, "logps/oppo_gen": -51.96064758300781, "logps/oppo_real": -160.8415069580078, "logps/real": -168.8170928955078, "loss": 0.2552, "loss/gen": 0.11834511160850525, "loss/real": 0.24989712238311768, "rewards/accuracies": 1.0, "rewards/generated": -210.93551635742188, "rewards/margins": 202.95993041992188, "rewards/real": -7.9755964279174805, "step": 111 }, { "epoch": 0.47, "grad_norm": 48.57264161718894, "learning_rate": 4.74002849002849e-07, "logits/generated": -1.7602338790893555, "logits/oppo_generated": -2.7906460762023926, "logits/oppo_real": -2.7454147338867188, "logits/real": -1.9760286808013916, "logps/generated": -233.21107482910156, "logps/oppo_gen": -67.77021789550781, "logps/oppo_real": -355.9058837890625, "logps/real": -337.13189697265625, "loss": 0.3077, "loss/gen": 0.4533146023750305, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -165.44085693359375, "rewards/margins": 184.21485900878906, "rewards/real": 18.774002075195312, "step": 112 }, { "epoch": 0.47, "grad_norm": 83.06293794560996, "learning_rate": 4.7364672364672366e-07, "logits/generated": -1.7146689891815186, "logits/oppo_generated": -2.784420967102051, "logits/oppo_real": -2.58797550201416, "logits/real": -1.975735068321228, "logps/generated": -264.473388671875, "logps/oppo_gen": -53.4489631652832, "logps/oppo_real": -213.77337646484375, "logps/real": -224.57752990722656, "loss": 0.2609, "loss/gen": 0.06298929452896118, "loss/real": 0.2708776891231537, "rewards/accuracies": 0.875, "rewards/generated": -211.02444458007812, "rewards/margins": 200.22027587890625, "rewards/real": -10.804159164428711, "step": 113 }, { "epoch": 0.48, "grad_norm": 54.351966206097366, "learning_rate": 4.7329059829059823e-07, "logits/generated": -1.9484028816223145, "logits/oppo_generated": -2.9693868160247803, "logits/oppo_real": -2.897064208984375, "logits/real": -2.2127110958099365, "logps/generated": -297.3500061035156, "logps/oppo_gen": -65.07535552978516, "logps/oppo_real": -380.3414306640625, "logps/real": -407.437255859375, "loss": 0.3122, "loss/gen": 0.0, "loss/real": 0.37634211778640747, "rewards/accuracies": 1.0, "rewards/generated": -232.27462768554688, "rewards/margins": 205.17877197265625, "rewards/real": -27.095857620239258, "step": 114 }, { "epoch": 0.48, "grad_norm": 44.581477978090156, "learning_rate": 4.729344729344729e-07, "logits/generated": -2.0135183334350586, "logits/oppo_generated": -2.8074076175689697, "logits/oppo_real": -2.9744620323181152, "logits/real": -2.006603479385376, "logps/generated": -310.1141357421875, "logps/oppo_gen": -81.67523193359375, "logps/oppo_real": -332.10321044921875, "logps/real": -344.72015380859375, "loss": 0.2123, "loss/gen": 0.24567674100399017, "loss/real": 0.2426839917898178, "rewards/accuracies": 0.875, "rewards/generated": -228.43890380859375, "rewards/margins": 215.82196044921875, "rewards/real": -12.616944313049316, "step": 115 }, { "epoch": 0.49, "grad_norm": 75.96934131814294, "learning_rate": 4.725783475783476e-07, "logits/generated": -1.9424694776535034, "logits/oppo_generated": -2.8780970573425293, "logits/oppo_real": -2.880333185195923, "logits/real": -2.094572067260742, "logps/generated": -384.4183654785156, "logps/oppo_gen": -83.72149658203125, "logps/oppo_real": -272.17291259765625, "logps/real": -253.05941772460938, "loss": 0.3365, "loss/gen": 0.0, "loss/real": 0.005366437137126923, "rewards/accuracies": 1.0, "rewards/generated": -300.6968688964844, "rewards/margins": 319.81036376953125, "rewards/real": 19.113483428955078, "step": 116 }, { "epoch": 0.49, "grad_norm": 68.25540843955152, "learning_rate": 4.722222222222222e-07, "logits/generated": -1.8966500759124756, "logits/oppo_generated": -2.8689210414886475, "logits/oppo_real": -3.036574602127075, "logits/real": -2.050349473953247, "logps/generated": -328.08782958984375, "logps/oppo_gen": -61.806739807128906, "logps/oppo_real": -213.864013671875, "logps/real": -201.69223022460938, "loss": 0.3216, "loss/gen": 0.027267932891845703, "loss/real": 0.0526232048869133, "rewards/accuracies": 1.0, "rewards/generated": -266.2810974121094, "rewards/margins": 278.45294189453125, "rewards/real": 12.171795845031738, "step": 117 }, { "epoch": 0.49, "grad_norm": 50.70511841227792, "learning_rate": 4.7186609686609683e-07, "logits/generated": -1.9891793727874756, "logits/oppo_generated": -2.847443103790283, "logits/oppo_real": -2.9110074043273926, "logits/real": -2.0313127040863037, "logps/generated": -321.2769775390625, "logps/oppo_gen": -68.70259857177734, "logps/oppo_real": -252.70947265625, "logps/real": -237.62449645996094, "loss": 0.2511, "loss/gen": 0.01610538363456726, "loss/real": 0.042245976626873016, "rewards/accuracies": 1.0, "rewards/generated": -252.5743865966797, "rewards/margins": 267.6593322753906, "rewards/real": 15.084959983825684, "step": 118 }, { "epoch": 0.5, "grad_norm": 30.791028943166324, "learning_rate": 4.715099715099715e-07, "logits/generated": -2.0675950050354004, "logits/oppo_generated": -2.850525140762329, "logits/oppo_real": -2.9623799324035645, "logits/real": -2.093207359313965, "logps/generated": -307.7158203125, "logps/oppo_gen": -70.65492248535156, "logps/oppo_real": -241.07968139648438, "logps/real": -263.4491271972656, "loss": 0.2583, "loss/gen": 0.007858753204345703, "loss/real": 0.329021155834198, "rewards/accuracies": 0.875, "rewards/generated": -237.0609130859375, "rewards/margins": 214.69146728515625, "rewards/real": -22.36945152282715, "step": 119 }, { "epoch": 0.5, "grad_norm": 50.498172538097855, "learning_rate": 4.711538461538461e-07, "logits/generated": -1.8775372505187988, "logits/oppo_generated": -2.760641574859619, "logits/oppo_real": -2.835960865020752, "logits/real": -2.0484185218811035, "logps/generated": -296.631591796875, "logps/oppo_gen": -77.80702209472656, "logps/oppo_real": -309.97265625, "logps/real": -322.2628173828125, "loss": 0.2541, "loss/gen": 0.1177206039428711, "loss/real": 0.30542153120040894, "rewards/accuracies": 1.0, "rewards/generated": -218.82455444335938, "rewards/margins": 206.5343475341797, "rewards/real": -12.290206909179688, "step": 120 }, { "epoch": 0.51, "grad_norm": 43.8276807575525, "learning_rate": 4.707977207977208e-07, "logits/generated": -1.9596431255340576, "logits/oppo_generated": -2.762300491333008, "logits/oppo_real": -2.91391658782959, "logits/real": -2.064877986907959, "logps/generated": -285.3265075683594, "logps/oppo_gen": -79.30331420898438, "logps/oppo_real": -206.95407104492188, "logps/real": -199.25717163085938, "loss": 0.2071, "loss/gen": 0.15041278302669525, "loss/real": 0.06987226009368896, "rewards/accuracies": 1.0, "rewards/generated": -206.023193359375, "rewards/margins": 213.7200927734375, "rewards/real": 7.696885108947754, "step": 121 }, { "epoch": 0.51, "grad_norm": 92.10083850391916, "learning_rate": 4.7044159544159537e-07, "logits/generated": -1.6970422267913818, "logits/oppo_generated": -2.8723740577697754, "logits/oppo_real": -2.730229139328003, "logits/real": -2.0857439041137695, "logps/generated": -367.9686584472656, "logps/oppo_gen": -68.4917984008789, "logps/oppo_real": -205.74790954589844, "logps/real": -213.32887268066406, "loss": 0.2593, "loss/gen": 0.01937010884284973, "loss/real": 0.20608964562416077, "rewards/accuracies": 1.0, "rewards/generated": -299.4768371582031, "rewards/margins": 291.8958740234375, "rewards/real": -7.580964088439941, "step": 122 }, { "epoch": 0.51, "grad_norm": 78.7925604237164, "learning_rate": 4.7008547008547005e-07, "logits/generated": -2.007908344268799, "logits/oppo_generated": -2.833265781402588, "logits/oppo_real": -2.8581643104553223, "logits/real": -2.1557552814483643, "logps/generated": -311.3591613769531, "logps/oppo_gen": -72.44357299804688, "logps/oppo_real": -294.85699462890625, "logps/real": -279.35943603515625, "loss": 0.2969, "loss/gen": 0.0, "loss/real": 0.0311884805560112, "rewards/accuracies": 1.0, "rewards/generated": -238.91558837890625, "rewards/margins": 254.41310119628906, "rewards/real": 15.497525215148926, "step": 123 }, { "epoch": 0.52, "grad_norm": 68.66656869430713, "learning_rate": 4.697293447293447e-07, "logits/generated": -1.8297350406646729, "logits/oppo_generated": -2.8131227493286133, "logits/oppo_real": -2.815453052520752, "logits/real": -2.1473500728607178, "logps/generated": -365.8243408203125, "logps/oppo_gen": -118.46414184570312, "logps/oppo_real": -350.6376953125, "logps/real": -340.7218933105469, "loss": 0.2489, "loss/gen": 0.0096682608127594, "loss/real": 0.037821196019649506, "rewards/accuracies": 1.0, "rewards/generated": -247.36019897460938, "rewards/margins": 257.2760009765625, "rewards/real": 9.915809631347656, "step": 124 }, { "epoch": 0.52, "grad_norm": 41.412493633036036, "learning_rate": 4.6937321937321934e-07, "logits/generated": -1.9806835651397705, "logits/oppo_generated": -2.868478775024414, "logits/oppo_real": -2.87443208694458, "logits/real": -2.1723623275756836, "logps/generated": -303.59033203125, "logps/oppo_gen": -72.4801025390625, "logps/oppo_real": -315.2503356933594, "logps/real": -303.8700256347656, "loss": 0.2741, "loss/gen": 0.0, "loss/real": 0.03125518560409546, "rewards/accuracies": 1.0, "rewards/generated": -231.11021423339844, "rewards/margins": 242.4905242919922, "rewards/real": 11.380315780639648, "step": 125 }, { "epoch": 0.53, "grad_norm": 70.06525975831389, "learning_rate": 4.69017094017094e-07, "logits/generated": -1.5762176513671875, "logits/oppo_generated": -2.5010550022125244, "logits/oppo_real": -2.635188102722168, "logits/real": -1.7362252473831177, "logps/generated": -365.1429443359375, "logps/oppo_gen": -80.23007202148438, "logps/oppo_real": -347.019287109375, "logps/real": -397.1420593261719, "loss": 0.1983, "loss/gen": 0.0, "loss/real": 0.6604471206665039, "rewards/accuracies": 1.0, "rewards/generated": -284.9128723144531, "rewards/margins": 234.7901153564453, "rewards/real": -50.12276840209961, "step": 126 }, { "epoch": 0.53, "grad_norm": 55.3230546956635, "learning_rate": 4.6866096866096864e-07, "logits/generated": -1.6325474977493286, "logits/oppo_generated": -2.6126623153686523, "logits/oppo_real": -2.6145567893981934, "logits/real": -1.8411592245101929, "logps/generated": -336.42913818359375, "logps/oppo_gen": -73.5291748046875, "logps/oppo_real": -317.5265808105469, "logps/real": -295.2335205078125, "loss": 0.2608, "loss/gen": 0.0, "loss/real": 0.005345538258552551, "rewards/accuracies": 1.0, "rewards/generated": -262.89996337890625, "rewards/margins": 285.1929931640625, "rewards/real": 22.293060302734375, "step": 127 }, { "epoch": 0.54, "grad_norm": 45.401364908412994, "learning_rate": 4.6830484330484326e-07, "logits/generated": -2.1946914196014404, "logits/oppo_generated": -3.0297465324401855, "logits/oppo_real": -3.101362705230713, "logits/real": -2.43679141998291, "logps/generated": -350.4021911621094, "logps/oppo_gen": -120.2161865234375, "logps/oppo_real": -532.0965576171875, "logps/real": -521.9387817382812, "loss": 0.3068, "loss/gen": 0.011398926377296448, "loss/real": 0.09254828840494156, "rewards/accuracies": 1.0, "rewards/generated": -230.1859893798828, "rewards/margins": 240.34373474121094, "rewards/real": 10.15774154663086, "step": 128 }, { "epoch": 0.54, "grad_norm": 78.93660616576285, "learning_rate": 4.6794871794871794e-07, "logits/generated": -1.9267959594726562, "logits/oppo_generated": -2.4462087154388428, "logits/oppo_real": -2.882254123687744, "logits/real": -1.6094073057174683, "logps/generated": -312.116943359375, "logps/oppo_gen": -74.71348571777344, "logps/oppo_real": -324.086669921875, "logps/real": -350.8844909667969, "loss": 0.2236, "loss/gen": 0.0, "loss/real": 0.3186296820640564, "rewards/accuracies": 1.0, "rewards/generated": -237.4034423828125, "rewards/margins": 210.60562133789062, "rewards/real": -26.79781723022461, "step": 129 }, { "epoch": 0.54, "grad_norm": 45.77649026025122, "learning_rate": 4.675925925925926e-07, "logits/generated": -2.0611488819122314, "logits/oppo_generated": -2.9427778720855713, "logits/oppo_real": -2.9869794845581055, "logits/real": -2.2055954933166504, "logps/generated": -256.5621337890625, "logps/oppo_gen": -57.98387908935547, "logps/oppo_real": -299.8202209472656, "logps/real": -324.31402587890625, "loss": 0.2659, "loss/gen": 0.2458263337612152, "loss/real": 0.35364583134651184, "rewards/accuracies": 1.0, "rewards/generated": -198.57826232910156, "rewards/margins": 174.08447265625, "rewards/real": -24.493789672851562, "step": 130 }, { "epoch": 0.55, "grad_norm": 129.35351572821668, "learning_rate": 4.672364672364672e-07, "logits/generated": -1.9105026721954346, "logits/oppo_generated": -2.462200880050659, "logits/oppo_real": -2.7382378578186035, "logits/real": -1.603495478630066, "logps/generated": -283.0691223144531, "logps/oppo_gen": -109.31198120117188, "logps/oppo_real": -333.22021484375, "logps/real": -322.46685791015625, "loss": 0.291, "loss/gen": 0.48477140069007874, "loss/real": 0.011933863162994385, "rewards/accuracies": 0.875, "rewards/generated": -173.7571258544922, "rewards/margins": 184.51043701171875, "rewards/real": 10.753315925598145, "step": 131 }, { "epoch": 0.55, "grad_norm": 35.79489027671233, "learning_rate": 4.6688034188034186e-07, "logits/generated": -2.0710315704345703, "logits/oppo_generated": -2.9814329147338867, "logits/oppo_real": -2.8366198539733887, "logits/real": -2.283444404602051, "logps/generated": -375.65386962890625, "logps/oppo_gen": -117.97686767578125, "logps/oppo_real": -333.4208679199219, "logps/real": -320.87835693359375, "loss": 0.3409, "loss/gen": 0.10222794115543365, "loss/real": 0.14502938091754913, "rewards/accuracies": 1.0, "rewards/generated": -257.677001953125, "rewards/margins": 270.2195129394531, "rewards/real": 12.542512893676758, "step": 132 }, { "epoch": 0.56, "grad_norm": 38.12187487191573, "learning_rate": 4.6652421652421653e-07, "logits/generated": -1.6588772535324097, "logits/oppo_generated": -2.6781723499298096, "logits/oppo_real": -2.516916513442993, "logits/real": -1.9674652814865112, "logps/generated": -311.58111572265625, "logps/oppo_gen": -60.19814682006836, "logps/oppo_real": -262.58551025390625, "logps/real": -258.6761474609375, "loss": 0.2715, "loss/gen": 0.24061748385429382, "loss/real": 0.12360702455043793, "rewards/accuracies": 1.0, "rewards/generated": -251.3829803466797, "rewards/margins": 255.29237365722656, "rewards/real": 3.9093809127807617, "step": 133 }, { "epoch": 0.56, "grad_norm": 42.19599162224269, "learning_rate": 4.6616809116809116e-07, "logits/generated": -1.791245698928833, "logits/oppo_generated": -2.8787498474121094, "logits/oppo_real": -2.805894374847412, "logits/real": -2.2519092559814453, "logps/generated": -353.5828552246094, "logps/oppo_gen": -124.28936767578125, "logps/oppo_real": -606.1627807617188, "logps/real": -580.4054565429688, "loss": 0.2072, "loss/gen": 0.14101070165634155, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -229.29348754882812, "rewards/margins": 255.05081176757812, "rewards/real": 25.75733184814453, "step": 134 }, { "epoch": 0.56, "grad_norm": 26.621898201803482, "learning_rate": 4.658119658119658e-07, "logits/generated": -2.115086078643799, "logits/oppo_generated": -2.765538454055786, "logits/oppo_real": -2.839543342590332, "logits/real": -2.0567030906677246, "logps/generated": -295.31121826171875, "logps/oppo_gen": -83.72669982910156, "logps/oppo_real": -361.6756591796875, "logps/real": -362.77685546875, "loss": 0.2186, "loss/gen": 0.22105728089809418, "loss/real": 0.2065998762845993, "rewards/accuracies": 1.0, "rewards/generated": -211.5845184326172, "rewards/margins": 210.4833526611328, "rewards/real": -1.1011724472045898, "step": 135 }, { "epoch": 0.57, "grad_norm": 18.935126127513485, "learning_rate": 4.654558404558404e-07, "logits/generated": -2.0748369693756104, "logits/oppo_generated": -2.7416014671325684, "logits/oppo_real": -2.8941569328308105, "logits/real": -1.9765853881835938, "logps/generated": -263.2115478515625, "logps/oppo_gen": -51.659912109375, "logps/oppo_real": -267.5926513671875, "logps/real": -248.9636688232422, "loss": 0.2529, "loss/gen": 0.011083722114562988, "loss/real": 0.0029998421669006348, "rewards/accuracies": 1.0, "rewards/generated": -211.55162048339844, "rewards/margins": 230.18063354492188, "rewards/real": 18.62899398803711, "step": 136 }, { "epoch": 0.57, "grad_norm": 99.21669289206076, "learning_rate": 4.650997150997151e-07, "logits/generated": -1.808034896850586, "logits/oppo_generated": -2.609920024871826, "logits/oppo_real": -2.5399818420410156, "logits/real": -1.9245736598968506, "logps/generated": -339.20257568359375, "logps/oppo_gen": -81.96345520019531, "logps/oppo_real": -258.99554443359375, "logps/real": -247.95123291015625, "loss": 0.3165, "loss/gen": 0.0, "loss/real": 0.023126445710659027, "rewards/accuracies": 1.0, "rewards/generated": -257.2391357421875, "rewards/margins": 268.28338623046875, "rewards/real": 11.044289588928223, "step": 137 }, { "epoch": 0.58, "grad_norm": 39.08625683834169, "learning_rate": 4.6474358974358975e-07, "logits/generated": -2.0943126678466797, "logits/oppo_generated": -2.89731502532959, "logits/oppo_real": -2.861166000366211, "logits/real": -2.182774543762207, "logps/generated": -280.51513671875, "logps/oppo_gen": -61.10588073730469, "logps/oppo_real": -297.8720703125, "logps/real": -302.93011474609375, "loss": 0.2691, "loss/gen": 0.0, "loss/real": 0.24881529808044434, "rewards/accuracies": 1.0, "rewards/generated": -219.40927124023438, "rewards/margins": 214.35122680664062, "rewards/real": -5.058034896850586, "step": 138 }, { "epoch": 0.58, "grad_norm": 36.347380649479064, "learning_rate": 4.643874643874643e-07, "logits/generated": -1.9154211282730103, "logits/oppo_generated": -2.8648695945739746, "logits/oppo_real": -2.711393356323242, "logits/real": -2.319422960281372, "logps/generated": -334.3785400390625, "logps/oppo_gen": -111.59371948242188, "logps/oppo_real": -521.255859375, "logps/real": -498.578857421875, "loss": 0.2889, "loss/gen": 0.0, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -222.78482055664062, "rewards/margins": 245.46185302734375, "rewards/real": 22.677024841308594, "step": 139 }, { "epoch": 0.59, "grad_norm": 32.38887627937445, "learning_rate": 4.64031339031339e-07, "logits/generated": -2.1019668579101562, "logits/oppo_generated": -2.8064088821411133, "logits/oppo_real": -2.845989227294922, "logits/real": -2.034879446029663, "logps/generated": -286.20269775390625, "logps/oppo_gen": -52.78784942626953, "logps/oppo_real": -172.55088806152344, "logps/real": -191.98782348632812, "loss": 0.2025, "loss/gen": 0.0, "loss/real": 0.3528357446193695, "rewards/accuracies": 0.875, "rewards/generated": -233.41485595703125, "rewards/margins": 213.9779052734375, "rewards/real": -19.43694305419922, "step": 140 }, { "epoch": 0.59, "grad_norm": 40.25526611755675, "learning_rate": 4.6367521367521367e-07, "logits/generated": -2.0735549926757812, "logits/oppo_generated": -3.0264251232147217, "logits/oppo_real": -2.836057186126709, "logits/real": -2.4234282970428467, "logps/generated": -314.87933349609375, "logps/oppo_gen": -74.337158203125, "logps/oppo_real": -371.032470703125, "logps/real": -335.6429138183594, "loss": 0.1941, "loss/gen": 0.0, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -240.5421905517578, "rewards/margins": 275.9317321777344, "rewards/real": 35.38955307006836, "step": 141 }, { "epoch": 0.59, "grad_norm": 63.756242035094104, "learning_rate": 4.633190883190883e-07, "logits/generated": -2.2163453102111816, "logits/oppo_generated": -2.876476764678955, "logits/oppo_real": -2.912707805633545, "logits/real": -2.1965935230255127, "logps/generated": -285.357666015625, "logps/oppo_gen": -90.53692626953125, "logps/oppo_real": -383.74615478515625, "logps/real": -361.18463134765625, "loss": 0.2526, "loss/gen": 0.1636572778224945, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -194.82073974609375, "rewards/margins": 217.3822784423828, "rewards/real": 22.561546325683594, "step": 142 }, { "epoch": 0.6, "grad_norm": 40.72590533351438, "learning_rate": 4.6296296296296297e-07, "logits/generated": -2.3514795303344727, "logits/oppo_generated": -2.9819746017456055, "logits/oppo_real": -3.1959123611450195, "logits/real": -2.3554928302764893, "logps/generated": -384.0709228515625, "logps/oppo_gen": -152.70217895507812, "logps/oppo_real": -483.54266357421875, "logps/real": -457.64434814453125, "loss": 0.2946, "loss/gen": 0.06552544236183167, "loss/real": 0.027411267161369324, "rewards/accuracies": 1.0, "rewards/generated": -231.36874389648438, "rewards/margins": 257.2670593261719, "rewards/real": 25.898303985595703, "step": 143 }, { "epoch": 0.6, "grad_norm": 43.21577996166808, "learning_rate": 4.626068376068376e-07, "logits/generated": -2.227564811706543, "logits/oppo_generated": -2.7378830909729004, "logits/oppo_real": -3.110536813735962, "logits/real": -2.0705103874206543, "logps/generated": -332.7789306640625, "logps/oppo_gen": -86.0918960571289, "logps/oppo_real": -447.7939147949219, "logps/real": -454.8052978515625, "loss": 0.3252, "loss/gen": 0.0, "loss/real": 0.17898190021514893, "rewards/accuracies": 1.0, "rewards/generated": -246.68701171875, "rewards/margins": 239.67562866210938, "rewards/real": -7.011386871337891, "step": 144 }, { "epoch": 0.61, "grad_norm": 49.99822018316627, "learning_rate": 4.622507122507122e-07, "logits/generated": -2.2606253623962402, "logits/oppo_generated": -2.7491419315338135, "logits/oppo_real": -3.191051483154297, "logits/real": -1.9832546710968018, "logps/generated": -342.39007568359375, "logps/oppo_gen": -96.26548767089844, "logps/oppo_real": -305.7531433105469, "logps/real": -301.7978820800781, "loss": 0.2736, "loss/gen": 0.2276860624551773, "loss/real": 0.10041482746601105, "rewards/accuracies": 1.0, "rewards/generated": -246.1245880126953, "rewards/margins": 250.07986450195312, "rewards/real": 3.9552855491638184, "step": 145 }, { "epoch": 0.61, "grad_norm": 55.35757344302584, "learning_rate": 4.618945868945869e-07, "logits/generated": -1.8759946823120117, "logits/oppo_generated": -2.8662476539611816, "logits/oppo_real": -2.7619881629943848, "logits/real": -2.2025985717773438, "logps/generated": -321.98046875, "logps/oppo_gen": -76.39656066894531, "logps/oppo_real": -342.36138916015625, "logps/real": -322.8648681640625, "loss": 0.302, "loss/gen": 0.0, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -245.58392333984375, "rewards/margins": 265.0804748535156, "rewards/real": 19.496538162231445, "step": 146 }, { "epoch": 0.62, "grad_norm": 28.60285672945864, "learning_rate": 4.6153846153846156e-07, "logits/generated": -2.0779922008514404, "logits/oppo_generated": -2.973456859588623, "logits/oppo_real": -2.9541869163513184, "logits/real": -2.259559154510498, "logps/generated": -264.32720947265625, "logps/oppo_gen": -58.52758026123047, "logps/oppo_real": -196.6337127685547, "logps/real": -206.65615844726562, "loss": 0.2171, "loss/gen": 0.16810482740402222, "loss/real": 0.20324894785881042, "rewards/accuracies": 1.0, "rewards/generated": -205.79962158203125, "rewards/margins": 195.77719116210938, "rewards/real": -10.022433280944824, "step": 147 }, { "epoch": 0.62, "grad_norm": 34.86539756356759, "learning_rate": 4.6118233618233613e-07, "logits/generated": -2.0752716064453125, "logits/oppo_generated": -2.9579458236694336, "logits/oppo_real": -2.8345115184783936, "logits/real": -2.342416763305664, "logps/generated": -335.4797668457031, "logps/oppo_gen": -86.37559509277344, "logps/oppo_real": -329.4002685546875, "logps/real": -338.41485595703125, "loss": 0.2448, "loss/gen": 0.02429106831550598, "loss/real": 0.24715474247932434, "rewards/accuracies": 1.0, "rewards/generated": -249.10418701171875, "rewards/margins": 240.08961486816406, "rewards/real": -9.01455020904541, "step": 148 }, { "epoch": 0.62, "grad_norm": 38.988763217452316, "learning_rate": 4.608262108262108e-07, "logits/generated": -1.6155552864074707, "logits/oppo_generated": -2.4297678470611572, "logits/oppo_real": -2.5349526405334473, "logits/real": -1.6601029634475708, "logps/generated": -383.1607666015625, "logps/oppo_gen": -139.25880432128906, "logps/oppo_real": -366.9024658203125, "logps/real": -382.0296630859375, "loss": 0.2172, "loss/gen": 0.0033193975687026978, "loss/real": 0.2846258878707886, "rewards/accuracies": 0.875, "rewards/generated": -243.90196228027344, "rewards/margins": 228.77481079101562, "rewards/real": -15.12716293334961, "step": 149 }, { "epoch": 0.63, "grad_norm": 48.59085099873484, "learning_rate": 4.6047008547008543e-07, "logits/generated": -1.6524815559387207, "logits/oppo_generated": -2.59027099609375, "logits/oppo_real": -2.5751681327819824, "logits/real": -1.8144464492797852, "logps/generated": -302.7337646484375, "logps/oppo_gen": -44.13750076293945, "logps/oppo_real": -126.39328002929688, "logps/real": -168.95840454101562, "loss": 0.2802, "loss/gen": 0.0, "loss/real": 0.5524503588676453, "rewards/accuracies": 1.0, "rewards/generated": -258.5962829589844, "rewards/margins": 216.03115844726562, "rewards/real": -42.56513214111328, "step": 150 }, { "epoch": 0.63, "grad_norm": 30.33221914180376, "learning_rate": 4.601139601139601e-07, "logits/generated": -1.917588710784912, "logits/oppo_generated": -2.8061888217926025, "logits/oppo_real": -2.885352611541748, "logits/real": -2.1351234912872314, "logps/generated": -359.21435546875, "logps/oppo_gen": -82.9956283569336, "logps/oppo_real": -287.7582702636719, "logps/real": -278.5579833984375, "loss": 0.1493, "loss/gen": 0.0, "loss/real": 0.013260193169116974, "rewards/accuracies": 1.0, "rewards/generated": -276.21868896484375, "rewards/margins": 285.4189453125, "rewards/real": 9.200270652770996, "step": 151 }, { "epoch": 0.64, "grad_norm": 41.78845855681551, "learning_rate": 4.5975783475783473e-07, "logits/generated": -1.7056605815887451, "logits/oppo_generated": -2.6804826259613037, "logits/oppo_real": -2.560675621032715, "logits/real": -2.0222690105438232, "logps/generated": -343.30615234375, "logps/oppo_gen": -125.20469665527344, "logps/oppo_real": -214.75454711914062, "logps/real": -228.00494384765625, "loss": 0.2917, "loss/gen": 0.05971069633960724, "loss/real": 0.2809779942035675, "rewards/accuracies": 0.875, "rewards/generated": -218.10145568847656, "rewards/margins": 204.85104370117188, "rewards/real": -13.250406265258789, "step": 152 }, { "epoch": 0.64, "grad_norm": 40.09102040347923, "learning_rate": 4.5940170940170935e-07, "logits/generated": -1.9970121383666992, "logits/oppo_generated": -2.8161306381225586, "logits/oppo_real": -2.873737096786499, "logits/real": -2.104854106903076, "logps/generated": -301.53515625, "logps/oppo_gen": -39.4675178527832, "logps/oppo_real": -94.7720718383789, "logps/real": -111.1379165649414, "loss": 0.2653, "loss/gen": 0.4245451092720032, "loss/real": 0.26902616024017334, "rewards/accuracies": 0.875, "rewards/generated": -262.0676574707031, "rewards/margins": 245.70181274414062, "rewards/real": -16.365846633911133, "step": 153 }, { "epoch": 0.64, "grad_norm": 46.05510297204453, "learning_rate": 4.59045584045584e-07, "logits/generated": -1.9282357692718506, "logits/oppo_generated": -2.754338026046753, "logits/oppo_real": -2.6611428260803223, "logits/real": -2.1131625175476074, "logps/generated": -287.21087646484375, "logps/oppo_gen": -53.64311981201172, "logps/oppo_real": -189.60964965820312, "logps/real": -202.54542541503906, "loss": 0.2588, "loss/gen": 0.0, "loss/real": 0.2690971791744232, "rewards/accuracies": 1.0, "rewards/generated": -233.5677490234375, "rewards/margins": 220.6319580078125, "rewards/real": -12.93579387664795, "step": 154 }, { "epoch": 0.65, "grad_norm": 24.115036245971748, "learning_rate": 4.586894586894587e-07, "logits/generated": -2.1603193283081055, "logits/oppo_generated": -2.8700437545776367, "logits/oppo_real": -3.012883186340332, "logits/real": -2.2183475494384766, "logps/generated": -321.8924560546875, "logps/oppo_gen": -64.43563842773438, "logps/oppo_real": -366.68572998046875, "logps/real": -341.61822509765625, "loss": 0.185, "loss/gen": 0.0, "loss/real": 0.0027311518788337708, "rewards/accuracies": 1.0, "rewards/generated": -257.45684814453125, "rewards/margins": 282.52435302734375, "rewards/real": 25.067520141601562, "step": 155 }, { "epoch": 0.65, "grad_norm": 44.438830907021085, "learning_rate": 4.5833333333333327e-07, "logits/generated": -1.8886809349060059, "logits/oppo_generated": -2.896176338195801, "logits/oppo_real": -2.7520911693573, "logits/real": -2.2607998847961426, "logps/generated": -359.7982177734375, "logps/oppo_gen": -94.6259765625, "logps/oppo_real": -329.9571533203125, "logps/real": -318.4111633300781, "loss": 0.2217, "loss/gen": 0.0, "loss/real": 0.06740894168615341, "rewards/accuracies": 1.0, "rewards/generated": -265.1722717285156, "rewards/margins": 276.71826171875, "rewards/real": 11.545991897583008, "step": 156 }, { "epoch": 0.66, "grad_norm": 36.220556469186114, "learning_rate": 4.5797720797720794e-07, "logits/generated": -2.0319433212280273, "logits/oppo_generated": -2.72526478767395, "logits/oppo_real": -2.760162591934204, "logits/real": -2.1123125553131104, "logps/generated": -314.482421875, "logps/oppo_gen": -70.71673583984375, "logps/oppo_real": -391.76458740234375, "logps/real": -421.2603454589844, "loss": 0.1594, "loss/gen": 0.0111636221408844, "loss/real": 0.3985538184642792, "rewards/accuracies": 1.0, "rewards/generated": -243.76568603515625, "rewards/margins": 214.2699737548828, "rewards/real": -29.495723724365234, "step": 157 }, { "epoch": 0.66, "grad_norm": 63.09100647204996, "learning_rate": 4.576210826210826e-07, "logits/generated": -2.4284703731536865, "logits/oppo_generated": -2.979785919189453, "logits/oppo_real": -3.2641677856445312, "logits/real": -2.3469204902648926, "logps/generated": -347.482421875, "logps/oppo_gen": -92.89317321777344, "logps/oppo_real": -330.3245849609375, "logps/real": -342.6152038574219, "loss": 0.2098, "loss/gen": 0.051213398575782776, "loss/real": 0.2405387908220291, "rewards/accuracies": 0.875, "rewards/generated": -254.58921813964844, "rewards/margins": 242.298583984375, "rewards/real": -12.290639877319336, "step": 158 }, { "epoch": 0.67, "grad_norm": 61.13583908242775, "learning_rate": 4.5726495726495724e-07, "logits/generated": -1.760241985321045, "logits/oppo_generated": -2.775574207305908, "logits/oppo_real": -2.598371744155884, "logits/real": -2.1788487434387207, "logps/generated": -302.0619201660156, "logps/oppo_gen": -65.71693420410156, "logps/oppo_real": -220.19737243652344, "logps/real": -206.774658203125, "loss": 0.1849, "loss/gen": 0.0, "loss/real": 0.006878167390823364, "rewards/accuracies": 1.0, "rewards/generated": -236.34498596191406, "rewards/margins": 249.7677001953125, "rewards/real": 13.422710418701172, "step": 159 }, { "epoch": 0.67, "grad_norm": 30.892653140615913, "learning_rate": 4.569088319088319e-07, "logits/generated": -1.6592109203338623, "logits/oppo_generated": -2.6892812252044678, "logits/oppo_real": -2.527797222137451, "logits/real": -2.02337646484375, "logps/generated": -255.10365295410156, "logps/oppo_gen": -56.507102966308594, "logps/oppo_real": -203.99942016601562, "logps/real": -214.63226318359375, "loss": 0.2654, "loss/gen": 0.4655250906944275, "loss/real": 0.16440606117248535, "rewards/accuracies": 0.875, "rewards/generated": -198.5965576171875, "rewards/margins": 187.96371459960938, "rewards/real": -10.632861137390137, "step": 160 }, { "epoch": 0.67, "grad_norm": 29.88231515108999, "learning_rate": 4.5655270655270654e-07, "logits/generated": -2.1158018112182617, "logits/oppo_generated": -2.892515182495117, "logits/oppo_real": -2.87583589553833, "logits/real": -2.2688913345336914, "logps/generated": -284.7113342285156, "logps/oppo_gen": -70.63409423828125, "logps/oppo_real": -236.45480346679688, "logps/real": -220.61276245117188, "loss": 0.2059, "loss/gen": 0.16342338919639587, "loss/real": 0.02121652662754059, "rewards/accuracies": 1.0, "rewards/generated": -214.07723999023438, "rewards/margins": 229.91929626464844, "rewards/real": 15.842063903808594, "step": 161 }, { "epoch": 0.68, "grad_norm": 38.31896420283467, "learning_rate": 4.5619658119658116e-07, "logits/generated": -1.7529704570770264, "logits/oppo_generated": -2.2372124195098877, "logits/oppo_real": -2.6531500816345215, "logits/real": -1.4958126544952393, "logps/generated": -241.34754943847656, "logps/oppo_gen": -49.9699821472168, "logps/oppo_real": -257.7629699707031, "logps/real": -258.93359375, "loss": 0.2438, "loss/gen": 0.5050817728042603, "loss/real": 0.21669313311576843, "rewards/accuracies": 1.0, "rewards/generated": -191.3775634765625, "rewards/margins": 190.20692443847656, "rewards/real": -1.170628547668457, "step": 162 }, { "epoch": 0.68, "grad_norm": 59.104889302435254, "learning_rate": 4.5584045584045584e-07, "logits/generated": -1.8979967832565308, "logits/oppo_generated": -2.6594979763031006, "logits/oppo_real": -2.72336483001709, "logits/real": -1.9622243642807007, "logps/generated": -281.2121887207031, "logps/oppo_gen": -69.47285461425781, "logps/oppo_real": -203.925048828125, "logps/real": -180.85427856445312, "loss": 0.2324, "loss/gen": 0.22617992758750916, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -211.7393341064453, "rewards/margins": 234.8101043701172, "rewards/real": 23.070770263671875, "step": 163 }, { "epoch": 0.69, "grad_norm": 53.67125863911009, "learning_rate": 4.5548433048433046e-07, "logits/generated": -2.1446683406829834, "logits/oppo_generated": -2.84741473197937, "logits/oppo_real": -2.9322423934936523, "logits/real": -2.304154396057129, "logps/generated": -333.1722106933594, "logps/oppo_gen": -72.28129577636719, "logps/oppo_real": -342.0706787109375, "logps/real": -361.2486572265625, "loss": 0.2267, "loss/gen": 0.0, "loss/real": 0.2817186713218689, "rewards/accuracies": 1.0, "rewards/generated": -260.8908996582031, "rewards/margins": 241.7129364013672, "rewards/real": -19.177961349487305, "step": 164 }, { "epoch": 0.69, "grad_norm": 41.20307611629637, "learning_rate": 4.551282051282051e-07, "logits/generated": -2.2117128372192383, "logits/oppo_generated": -2.8123486042022705, "logits/oppo_real": -2.9484448432922363, "logits/real": -2.303635597229004, "logps/generated": -301.5762634277344, "logps/oppo_gen": -78.67784118652344, "logps/oppo_real": -224.94638061523438, "logps/real": -219.7537841796875, "loss": 0.2616, "loss/gen": 0.048871323466300964, "loss/real": 0.12874145805835724, "rewards/accuracies": 1.0, "rewards/generated": -222.8984375, "rewards/margins": 228.09103393554688, "rewards/real": 5.192612648010254, "step": 165 }, { "epoch": 0.69, "grad_norm": 64.1642332466071, "learning_rate": 4.5477207977207976e-07, "logits/generated": -2.0593137741088867, "logits/oppo_generated": -2.6430654525756836, "logits/oppo_real": -2.7417783737182617, "logits/real": -2.0025062561035156, "logps/generated": -283.02716064453125, "logps/oppo_gen": -63.871150970458984, "logps/oppo_real": -224.14703369140625, "logps/real": -193.3045654296875, "loss": 0.1827, "loss/gen": 0.320221483707428, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -219.15602111816406, "rewards/margins": 249.99851989746094, "rewards/real": 30.842487335205078, "step": 166 }, { "epoch": 0.7, "grad_norm": 44.01251874910847, "learning_rate": 4.544159544159544e-07, "logits/generated": -2.1278610229492188, "logits/oppo_generated": -2.757966995239258, "logits/oppo_real": -2.906935691833496, "logits/real": -2.026312828063965, "logps/generated": -245.8863525390625, "logps/oppo_gen": -53.980133056640625, "logps/oppo_real": -168.99293518066406, "logps/real": -175.02117919921875, "loss": 0.1984, "loss/gen": 0.3063175082206726, "loss/real": 0.224711611866951, "rewards/accuracies": 1.0, "rewards/generated": -191.90621948242188, "rewards/margins": 185.87799072265625, "rewards/real": -6.028232574462891, "step": 167 }, { "epoch": 0.7, "grad_norm": 89.25379401512157, "learning_rate": 4.5405982905982905e-07, "logits/generated": -1.7046711444854736, "logits/oppo_generated": -2.34848690032959, "logits/oppo_real": -2.549453020095825, "logits/real": -1.6289647817611694, "logps/generated": -291.48760986328125, "logps/oppo_gen": -41.99907684326172, "logps/oppo_real": -137.05735778808594, "logps/real": -138.64942932128906, "loss": 0.2167, "loss/gen": 0.0, "loss/real": 0.2769484519958496, "rewards/accuracies": 1.0, "rewards/generated": -249.48851013183594, "rewards/margins": 247.8964385986328, "rewards/real": -1.5920724868774414, "step": 168 }, { "epoch": 0.71, "grad_norm": 85.32059828779624, "learning_rate": 4.537037037037037e-07, "logits/generated": -1.9084529876708984, "logits/oppo_generated": -2.5094847679138184, "logits/oppo_real": -2.6891722679138184, "logits/real": -1.900597095489502, "logps/generated": -450.51837158203125, "logps/oppo_gen": -68.40258026123047, "logps/oppo_real": -223.42794799804688, "logps/real": -218.4866943359375, "loss": 0.2427, "loss/gen": 0.2575632333755493, "loss/real": 0.20119953155517578, "rewards/accuracies": 1.0, "rewards/generated": -382.1158142089844, "rewards/margins": 387.05706787109375, "rewards/real": 4.941247940063477, "step": 169 }, { "epoch": 0.71, "grad_norm": 63.92922684439313, "learning_rate": 4.533475783475783e-07, "logits/generated": -1.9882678985595703, "logits/oppo_generated": -2.8935999870300293, "logits/oppo_real": -2.775484561920166, "logits/real": -2.3655059337615967, "logps/generated": -269.10772705078125, "logps/oppo_gen": -50.93283462524414, "logps/oppo_real": -316.0002136230469, "logps/real": -292.96356201171875, "loss": 0.2371, "loss/gen": 0.10934163630008698, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -218.1748809814453, "rewards/margins": 241.2115478515625, "rewards/real": 23.036657333374023, "step": 170 }, { "epoch": 0.72, "grad_norm": 109.75823929778575, "learning_rate": 4.5299145299145297e-07, "logits/generated": -2.2796077728271484, "logits/oppo_generated": -2.8526816368103027, "logits/oppo_real": -3.2386014461517334, "logits/real": -2.2865777015686035, "logps/generated": -342.9628601074219, "logps/oppo_gen": -113.54923248291016, "logps/oppo_real": -351.7125549316406, "logps/real": -339.4488220214844, "loss": 0.2334, "loss/gen": 0.26814204454421997, "loss/real": 0.026034392416477203, "rewards/accuracies": 0.875, "rewards/generated": -229.4136199951172, "rewards/margins": 241.67733764648438, "rewards/real": 12.263713836669922, "step": 171 }, { "epoch": 0.72, "grad_norm": 98.73111963497122, "learning_rate": 4.5263532763532765e-07, "logits/generated": -2.3369998931884766, "logits/oppo_generated": -2.9850940704345703, "logits/oppo_real": -3.0315611362457275, "logits/real": -2.358889102935791, "logps/generated": -314.4837341308594, "logps/oppo_gen": -61.65489196777344, "logps/oppo_real": -151.10653686523438, "logps/real": -144.9267578125, "loss": 0.2096, "loss/gen": 0.0, "loss/real": 0.1058526411652565, "rewards/accuracies": 1.0, "rewards/generated": -252.82882690429688, "rewards/margins": 259.00860595703125, "rewards/real": 6.179767608642578, "step": 172 }, { "epoch": 0.72, "grad_norm": 43.88860577412861, "learning_rate": 4.522792022792022e-07, "logits/generated": -2.205157995223999, "logits/oppo_generated": -2.891350746154785, "logits/oppo_real": -3.0990657806396484, "logits/real": -2.274484634399414, "logps/generated": -485.29437255859375, "logps/oppo_gen": -212.02532958984375, "logps/oppo_real": -549.8078002929688, "logps/real": -530.3729248046875, "loss": 0.2064, "loss/gen": 0.0, "loss/real": 0.04228302091360092, "rewards/accuracies": 1.0, "rewards/generated": -273.2690734863281, "rewards/margins": 292.70391845703125, "rewards/real": 19.434844970703125, "step": 173 }, { "epoch": 0.73, "grad_norm": 31.48113836496238, "learning_rate": 4.519230769230769e-07, "logits/generated": -1.9153656959533691, "logits/oppo_generated": -2.861656904220581, "logits/oppo_real": -2.749734878540039, "logits/real": -2.23591947555542, "logps/generated": -283.47076416015625, "logps/oppo_gen": -52.08341598510742, "logps/oppo_real": -268.2560119628906, "logps/real": -241.11734008789062, "loss": 0.146, "loss/gen": 0.0, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -231.38734436035156, "rewards/margins": 258.5260314941406, "rewards/real": 27.138673782348633, "step": 174 }, { "epoch": 0.73, "grad_norm": 55.337732592124695, "learning_rate": 4.5156695156695157e-07, "logits/generated": -2.0015411376953125, "logits/oppo_generated": -2.8331031799316406, "logits/oppo_real": -2.8462958335876465, "logits/real": -2.170417308807373, "logps/generated": -387.2781677246094, "logps/oppo_gen": -78.92254638671875, "logps/oppo_real": -224.86373901367188, "logps/real": -244.69886779785156, "loss": 0.2824, "loss/gen": 0.0006727427244186401, "loss/real": 0.33926331996917725, "rewards/accuracies": 1.0, "rewards/generated": -308.3556213378906, "rewards/margins": 288.5204772949219, "rewards/real": -19.835121154785156, "step": 175 }, { "epoch": 0.74, "grad_norm": 143.149881843734, "learning_rate": 4.512108262108262e-07, "logits/generated": -2.24777889251709, "logits/oppo_generated": -2.879185199737549, "logits/oppo_real": -2.873112678527832, "logits/real": -2.3650031089782715, "logps/generated": -310.72698974609375, "logps/oppo_gen": -49.27460479736328, "logps/oppo_real": -375.43463134765625, "logps/real": -367.69134521484375, "loss": 0.2742, "loss/gen": 0.003482311964035034, "loss/real": 0.10677148401737213, "rewards/accuracies": 1.0, "rewards/generated": -261.4523620605469, "rewards/margins": 269.1956481933594, "rewards/real": 7.7432966232299805, "step": 176 }, { "epoch": 0.74, "grad_norm": 105.91241867546742, "learning_rate": 4.5085470085470087e-07, "logits/generated": -2.244475841522217, "logits/oppo_generated": -3.0462043285369873, "logits/oppo_real": -3.1089582443237305, "logits/real": -2.443125009536743, "logps/generated": -331.522705078125, "logps/oppo_gen": -77.79332733154297, "logps/oppo_real": -319.2231750488281, "logps/real": -294.63909912109375, "loss": 0.2243, "loss/gen": 0.0, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -253.72940063476562, "rewards/margins": 278.3134765625, "rewards/real": 24.58407974243164, "step": 177 }, { "epoch": 0.74, "grad_norm": 39.86911416934715, "learning_rate": 4.5049857549857543e-07, "logits/generated": -2.201946973800659, "logits/oppo_generated": -2.815687656402588, "logits/oppo_real": -2.9501237869262695, "logits/real": -2.219613552093506, "logps/generated": -352.42193603515625, "logps/oppo_gen": -103.51431274414062, "logps/oppo_real": -308.8333435058594, "logps/real": -304.56707763671875, "loss": 0.2701, "loss/gen": 0.01318824291229248, "loss/real": 0.14666341245174408, "rewards/accuracies": 1.0, "rewards/generated": -248.90762329101562, "rewards/margins": 253.17388916015625, "rewards/real": 4.266262054443359, "step": 178 }, { "epoch": 0.75, "grad_norm": 52.815620285042094, "learning_rate": 4.501424501424501e-07, "logits/generated": -2.0335657596588135, "logits/oppo_generated": -2.779146194458008, "logits/oppo_real": -2.8336267471313477, "logits/real": -2.1648244857788086, "logps/generated": -302.6240234375, "logps/oppo_gen": -72.71639251708984, "logps/oppo_real": -196.57557678222656, "logps/real": -177.44334411621094, "loss": 0.1641, "loss/gen": 0.03493678569793701, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -229.90762329101562, "rewards/margins": 249.03985595703125, "rewards/real": 19.132234573364258, "step": 179 }, { "epoch": 0.75, "grad_norm": 53.27667685652888, "learning_rate": 4.497863247863248e-07, "logits/generated": -2.264275074005127, "logits/oppo_generated": -2.8425636291503906, "logits/oppo_real": -2.9093685150146484, "logits/real": -2.2906460762023926, "logps/generated": -324.7326354980469, "logps/oppo_gen": -95.93893432617188, "logps/oppo_real": -207.11392211914062, "logps/real": -191.6349334716797, "loss": 0.2242, "loss/gen": 0.2739104628562927, "loss/real": 0.014250755310058594, "rewards/accuracies": 1.0, "rewards/generated": -228.793701171875, "rewards/margins": 244.272705078125, "rewards/real": 15.478999137878418, "step": 180 }, { "epoch": 0.76, "grad_norm": 71.11992993242998, "learning_rate": 4.494301994301994e-07, "logits/generated": -2.115266799926758, "logits/oppo_generated": -2.8224010467529297, "logits/oppo_real": -2.778409957885742, "logits/real": -2.3558645248413086, "logps/generated": -344.7413024902344, "logps/oppo_gen": -88.16463470458984, "logps/oppo_real": -239.9169921875, "logps/real": -255.3130645751953, "loss": 0.2213, "loss/gen": 0.029619291424751282, "loss/real": 0.297105997800827, "rewards/accuracies": 1.0, "rewards/generated": -256.57666015625, "rewards/margins": 241.18060302734375, "rewards/real": -15.396068572998047, "step": 181 }, { "epoch": 0.76, "grad_norm": 86.04380073271548, "learning_rate": 4.4907407407407403e-07, "logits/generated": -2.3114571571350098, "logits/oppo_generated": -2.9657952785491943, "logits/oppo_real": -2.9425137042999268, "logits/real": -2.491687059402466, "logps/generated": -314.3035583496094, "logps/oppo_gen": -76.42547607421875, "logps/oppo_real": -261.8043518066406, "logps/real": -245.9702911376953, "loss": 0.3062, "loss/gen": 0.0, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -237.87808227539062, "rewards/margins": 253.712158203125, "rewards/real": 15.834070205688477, "step": 182 }, { "epoch": 0.77, "grad_norm": 106.78727210850083, "learning_rate": 4.487179487179487e-07, "logits/generated": -1.8033708333969116, "logits/oppo_generated": -2.6656646728515625, "logits/oppo_real": -2.512063980102539, "logits/real": -2.0389981269836426, "logps/generated": -216.2008056640625, "logps/oppo_gen": -61.16596603393555, "logps/oppo_real": -89.70797729492188, "logps/real": -71.5185317993164, "loss": 0.1824, "loss/gen": 0.8215519785881042, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -155.03482055664062, "rewards/margins": 173.22427368164062, "rewards/real": 18.189449310302734, "step": 183 }, { "epoch": 0.77, "grad_norm": 101.12620711578676, "learning_rate": 4.4836182336182333e-07, "logits/generated": -1.9014736413955688, "logits/oppo_generated": -2.679591655731201, "logits/oppo_real": -2.5152084827423096, "logits/real": -2.1934709548950195, "logps/generated": -450.9914855957031, "logps/oppo_gen": -134.39280700683594, "logps/oppo_real": -353.8466491699219, "logps/real": -347.7996520996094, "loss": 0.2277, "loss/gen": 0.11133264005184174, "loss/real": 0.03321786969900131, "rewards/accuracies": 1.0, "rewards/generated": -316.59869384765625, "rewards/margins": 322.64569091796875, "rewards/real": 6.047005653381348, "step": 184 }, { "epoch": 0.77, "grad_norm": 35.871571659467506, "learning_rate": 4.48005698005698e-07, "logits/generated": -2.210495710372925, "logits/oppo_generated": -2.8852622509002686, "logits/oppo_real": -2.9888343811035156, "logits/real": -2.316572427749634, "logps/generated": -349.71429443359375, "logps/oppo_gen": -86.57408142089844, "logps/oppo_real": -353.78594970703125, "logps/real": -361.350830078125, "loss": 0.1252, "loss/gen": 0.0, "loss/real": 0.25374865531921387, "rewards/accuracies": 1.0, "rewards/generated": -263.14019775390625, "rewards/margins": 255.5753631591797, "rewards/real": -7.564859867095947, "step": 185 }, { "epoch": 0.78, "grad_norm": 30.56805032519401, "learning_rate": 4.476495726495726e-07, "logits/generated": -2.2187647819519043, "logits/oppo_generated": -2.894904136657715, "logits/oppo_real": -2.8833250999450684, "logits/real": -2.351996421813965, "logps/generated": -333.04010009765625, "logps/oppo_gen": -97.552490234375, "logps/oppo_real": -446.60357666015625, "logps/real": -427.14239501953125, "loss": 0.1785, "loss/gen": 0.018305152654647827, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -235.4876251220703, "rewards/margins": 254.9488067626953, "rewards/real": 19.46118927001953, "step": 186 }, { "epoch": 0.78, "grad_norm": 65.2351178849916, "learning_rate": 4.4729344729344725e-07, "logits/generated": -2.244483709335327, "logits/oppo_generated": -2.9238195419311523, "logits/oppo_real": -2.928109645843506, "logits/real": -2.411482810974121, "logps/generated": -433.5346984863281, "logps/oppo_gen": -99.34373474121094, "logps/oppo_real": -381.1275634765625, "logps/real": -367.78045654296875, "loss": 0.2426, "loss/gen": 0.0, "loss/real": 0.021128714084625244, "rewards/accuracies": 1.0, "rewards/generated": -334.19097900390625, "rewards/margins": 347.5380859375, "rewards/real": 13.347097396850586, "step": 187 }, { "epoch": 0.79, "grad_norm": 73.61953834262512, "learning_rate": 4.469373219373219e-07, "logits/generated": -1.8134526014328003, "logits/oppo_generated": -2.7080626487731934, "logits/oppo_real": -2.5767087936401367, "logits/real": -2.030604124069214, "logps/generated": -408.92938232421875, "logps/oppo_gen": -46.502037048339844, "logps/oppo_real": -149.05059814453125, "logps/real": -196.5388641357422, "loss": 0.2718, "loss/gen": 0.0, "loss/real": 0.5971862077713013, "rewards/accuracies": 1.0, "rewards/generated": -362.4273376464844, "rewards/margins": 314.9390869140625, "rewards/real": -47.488250732421875, "step": 188 }, { "epoch": 0.79, "grad_norm": 37.51273384059488, "learning_rate": 4.465811965811966e-07, "logits/generated": -2.0432450771331787, "logits/oppo_generated": -2.9217922687530518, "logits/oppo_real": -3.0358145236968994, "logits/real": -2.355529546737671, "logps/generated": -332.09490966796875, "logps/oppo_gen": -72.13301849365234, "logps/oppo_real": -295.51861572265625, "logps/real": -308.53802490234375, "loss": 0.2465, "loss/gen": 0.0, "loss/real": 0.3299695551395416, "rewards/accuracies": 1.0, "rewards/generated": -259.96185302734375, "rewards/margins": 246.94244384765625, "rewards/real": -13.019420623779297, "step": 189 }, { "epoch": 0.79, "grad_norm": 54.30798222723571, "learning_rate": 4.4622507122507117e-07, "logits/generated": -2.0533862113952637, "logits/oppo_generated": -2.7406344413757324, "logits/oppo_real": -2.799593925476074, "logits/real": -2.181865930557251, "logps/generated": -351.9316101074219, "logps/oppo_gen": -102.60955810546875, "logps/oppo_real": -305.8299255371094, "logps/real": -278.4617004394531, "loss": 0.1727, "loss/gen": 0.01339229941368103, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -249.32205200195312, "rewards/margins": 276.6903076171875, "rewards/real": 27.368215560913086, "step": 190 }, { "epoch": 0.8, "grad_norm": 31.494153645830643, "learning_rate": 4.4586894586894584e-07, "logits/generated": -2.32715106010437, "logits/oppo_generated": -2.8220396041870117, "logits/oppo_real": -3.0663821697235107, "logits/real": -2.326672077178955, "logps/generated": -296.7817077636719, "logps/oppo_gen": -80.95722961425781, "logps/oppo_real": -339.0364074707031, "logps/real": -326.7790832519531, "loss": 0.1931, "loss/gen": 0.2630905210971832, "loss/real": 0.00023437291383743286, "rewards/accuracies": 0.875, "rewards/generated": -215.82449340820312, "rewards/margins": 228.08184814453125, "rewards/real": 12.25734806060791, "step": 191 }, { "epoch": 0.8, "grad_norm": 54.0069326966166, "learning_rate": 4.455128205128205e-07, "logits/generated": -2.081082344055176, "logits/oppo_generated": -2.8528313636779785, "logits/oppo_real": -2.9469070434570312, "logits/real": -2.2097737789154053, "logps/generated": -291.0604248046875, "logps/oppo_gen": -55.95906066894531, "logps/oppo_real": -228.37322998046875, "logps/real": -207.25762939453125, "loss": 0.185, "loss/gen": 0.0, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -235.10134887695312, "rewards/margins": 256.2169494628906, "rewards/real": 21.115596771240234, "step": 192 }, { "epoch": 0.81, "grad_norm": 49.09352870335229, "learning_rate": 4.4515669515669514e-07, "logits/generated": -2.103522539138794, "logits/oppo_generated": -2.759657859802246, "logits/oppo_real": -2.7739434242248535, "logits/real": -2.251528263092041, "logps/generated": -327.42047119140625, "logps/oppo_gen": -55.900001525878906, "logps/oppo_real": -240.51673889160156, "logps/real": -249.0882568359375, "loss": 0.197, "loss/gen": 0.0, "loss/real": 0.29675740003585815, "rewards/accuracies": 0.875, "rewards/generated": -271.5204772949219, "rewards/margins": 262.9489440917969, "rewards/real": -8.571529388427734, "step": 193 }, { "epoch": 0.81, "grad_norm": 51.20193296598406, "learning_rate": 4.448005698005698e-07, "logits/generated": -2.0290396213531494, "logits/oppo_generated": -2.714049816131592, "logits/oppo_real": -2.821863889694214, "logits/real": -2.0799052715301514, "logps/generated": -250.7839813232422, "logps/oppo_gen": -61.66150665283203, "logps/oppo_real": -281.81561279296875, "logps/real": -291.30224609375, "loss": 0.2763, "loss/gen": 0.3449553847312927, "loss/real": 0.29574069380760193, "rewards/accuracies": 1.0, "rewards/generated": -189.12246704101562, "rewards/margins": 179.63583374023438, "rewards/real": -9.486623764038086, "step": 194 }, { "epoch": 0.82, "grad_norm": 61.67454112871453, "learning_rate": 4.444444444444444e-07, "logits/generated": -1.8745107650756836, "logits/oppo_generated": -2.7336645126342773, "logits/oppo_real": -2.6636435985565186, "logits/real": -2.19765043258667, "logps/generated": -274.8359375, "logps/oppo_gen": -66.04891204833984, "logps/oppo_real": -343.6158447265625, "logps/real": -310.6270751953125, "loss": 0.2015, "loss/gen": 0.22997678816318512, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -208.78701782226562, "rewards/margins": 241.7758026123047, "rewards/real": 32.98878479003906, "step": 195 }, { "epoch": 0.82, "grad_norm": 53.72602799374224, "learning_rate": 4.4408831908831906e-07, "logits/generated": -2.1320905685424805, "logits/oppo_generated": -3.0542874336242676, "logits/oppo_real": -2.803119659423828, "logits/real": -2.5108633041381836, "logps/generated": -288.25958251953125, "logps/oppo_gen": -81.553955078125, "logps/oppo_real": -376.17071533203125, "logps/real": -342.6852722167969, "loss": 0.2462, "loss/gen": 0.2779223918914795, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -206.70559692382812, "rewards/margins": 240.19102478027344, "rewards/real": 33.485435485839844, "step": 196 }, { "epoch": 0.82, "grad_norm": 25.261345155831098, "learning_rate": 4.4373219373219373e-07, "logits/generated": -2.1422460079193115, "logits/oppo_generated": -2.791293144226074, "logits/oppo_real": -2.8689441680908203, "logits/real": -2.3548340797424316, "logps/generated": -355.26690673828125, "logps/oppo_gen": -90.10079956054688, "logps/oppo_real": -387.6597900390625, "logps/real": -367.0665588378906, "loss": 0.2392, "loss/gen": 0.17274703085422516, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -265.1661071777344, "rewards/margins": 285.75933837890625, "rewards/real": 20.593231201171875, "step": 197 }, { "epoch": 0.83, "grad_norm": 29.314516563430757, "learning_rate": 4.4337606837606836e-07, "logits/generated": -2.09847354888916, "logits/oppo_generated": -2.8356850147247314, "logits/oppo_real": -2.917833089828491, "logits/real": -2.1983418464660645, "logps/generated": -326.11285400390625, "logps/oppo_gen": -76.40264892578125, "logps/oppo_real": -278.172607421875, "logps/real": -253.1783447265625, "loss": 0.1414, "loss/gen": 0.12514609098434448, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -249.710205078125, "rewards/margins": 274.7044677734375, "rewards/real": 24.9942684173584, "step": 198 }, { "epoch": 0.83, "grad_norm": 48.53973837148151, "learning_rate": 4.43019943019943e-07, "logits/generated": -2.2969937324523926, "logits/oppo_generated": -3.0011539459228516, "logits/oppo_real": -3.069876194000244, "logits/real": -2.477539539337158, "logps/generated": -303.3049011230469, "logps/oppo_gen": -69.13575744628906, "logps/oppo_real": -340.70343017578125, "logps/real": -357.84661865234375, "loss": 0.2047, "loss/gen": 0.19307354092597961, "loss/real": 0.3465298116207123, "rewards/accuracies": 1.0, "rewards/generated": -234.16915893554688, "rewards/margins": 217.02597045898438, "rewards/real": -17.143173217773438, "step": 199 }, { "epoch": 0.84, "grad_norm": 40.12700023003137, "learning_rate": 4.4266381766381765e-07, "logits/generated": -2.04579758644104, "logits/oppo_generated": -2.821411609649658, "logits/oppo_real": -2.9697532653808594, "logits/real": -2.3006458282470703, "logps/generated": -352.8709411621094, "logps/oppo_gen": -94.25292205810547, "logps/oppo_real": -449.1705322265625, "logps/real": -422.84283447265625, "loss": 0.134, "loss/gen": 0.19948835670948029, "loss/real": 0.013253934681415558, "rewards/accuracies": 1.0, "rewards/generated": -258.6180419921875, "rewards/margins": 284.94573974609375, "rewards/real": 26.32770347595215, "step": 200 }, { "epoch": 0.84, "grad_norm": 55.41589471188524, "learning_rate": 4.423076923076923e-07, "logits/generated": -2.029297351837158, "logits/oppo_generated": -2.9498441219329834, "logits/oppo_real": -2.889374017715454, "logits/real": -2.3880996704101562, "logps/generated": -370.95904541015625, "logps/oppo_gen": -93.28401184082031, "logps/oppo_real": -446.9027099609375, "logps/real": -425.0364074707031, "loss": 0.1887, "loss/gen": 0.0018385052680969238, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -277.6750183105469, "rewards/margins": 299.541259765625, "rewards/real": 21.866281509399414, "step": 201 }, { "epoch": 0.85, "grad_norm": 56.23495458509325, "learning_rate": 4.4195156695156695e-07, "logits/generated": -1.6252273321151733, "logits/oppo_generated": -2.5877699851989746, "logits/oppo_real": -2.4145617485046387, "logits/real": -1.9977871179580688, "logps/generated": -345.75006103515625, "logps/oppo_gen": -58.147544860839844, "logps/oppo_real": -256.63494873046875, "logps/real": -243.7563018798828, "loss": 0.2266, "loss/gen": 0.0, "loss/real": 0.011003687977790833, "rewards/accuracies": 1.0, "rewards/generated": -287.6025085449219, "rewards/margins": 300.481201171875, "rewards/real": 12.878662109375, "step": 202 }, { "epoch": 0.85, "grad_norm": 66.24101870135868, "learning_rate": 4.4159544159544157e-07, "logits/generated": -2.048314094543457, "logits/oppo_generated": -2.825096607208252, "logits/oppo_real": -2.919394016265869, "logits/real": -2.1845545768737793, "logps/generated": -257.5550842285156, "logps/oppo_gen": -62.71122360229492, "logps/oppo_real": -234.44354248046875, "logps/real": -217.26864624023438, "loss": 0.1972, "loss/gen": 0.24772684276103973, "loss/real": 0.03300228714942932, "rewards/accuracies": 1.0, "rewards/generated": -194.84384155273438, "rewards/margins": 212.01873779296875, "rewards/real": 17.17490005493164, "step": 203 }, { "epoch": 0.85, "grad_norm": 45.537051363422734, "learning_rate": 4.412393162393162e-07, "logits/generated": -2.0483438968658447, "logits/oppo_generated": -2.681910276412964, "logits/oppo_real": -2.8930723667144775, "logits/real": -2.0234827995300293, "logps/generated": -298.2679138183594, "logps/oppo_gen": -69.35714721679688, "logps/oppo_real": -321.68878173828125, "logps/real": -300.1239013671875, "loss": 0.1831, "loss/gen": 0.39896392822265625, "loss/real": 0.027880370616912842, "rewards/accuracies": 1.0, "rewards/generated": -228.91075134277344, "rewards/margins": 250.47564697265625, "rewards/real": 21.564884185791016, "step": 204 }, { "epoch": 0.86, "grad_norm": 51.88816192212102, "learning_rate": 4.4088319088319087e-07, "logits/generated": -1.9750864505767822, "logits/oppo_generated": -2.910146951675415, "logits/oppo_real": -2.842686653137207, "logits/real": -2.2580361366271973, "logps/generated": -364.1310119628906, "logps/oppo_gen": -55.29602813720703, "logps/oppo_real": -188.457763671875, "logps/real": -190.8536834716797, "loss": 0.1574, "loss/gen": 0.0, "loss/real": 0.2326948642730713, "rewards/accuracies": 1.0, "rewards/generated": -308.8349609375, "rewards/margins": 306.4390869140625, "rewards/real": -2.3959202766418457, "step": 205 }, { "epoch": 0.86, "grad_norm": 49.47076354082783, "learning_rate": 4.4052706552706555e-07, "logits/generated": -2.08099365234375, "logits/oppo_generated": -2.9482345581054688, "logits/oppo_real": -3.0109448432922363, "logits/real": -2.36570405960083, "logps/generated": -295.0722961425781, "logps/oppo_gen": -70.6409912109375, "logps/oppo_real": -375.189697265625, "logps/real": -361.89434814453125, "loss": 0.1832, "loss/gen": 0.013277322053909302, "loss/real": 0.03908447176218033, "rewards/accuracies": 1.0, "rewards/generated": -224.4313201904297, "rewards/margins": 237.72665405273438, "rewards/real": 13.295326232910156, "step": 206 }, { "epoch": 0.87, "grad_norm": 23.347891198939145, "learning_rate": 4.4017094017094017e-07, "logits/generated": -2.0084404945373535, "logits/oppo_generated": -2.7811834812164307, "logits/oppo_real": -2.923962116241455, "logits/real": -2.1404595375061035, "logps/generated": -311.7547607421875, "logps/oppo_gen": -71.71026611328125, "logps/oppo_real": -353.846923828125, "logps/real": -363.43988037109375, "loss": 0.1818, "loss/gen": 0.030606284737586975, "loss/real": 0.2629862129688263, "rewards/accuracies": 1.0, "rewards/generated": -240.0445098876953, "rewards/margins": 230.45156860351562, "rewards/real": -9.592939376831055, "step": 207 }, { "epoch": 0.87, "grad_norm": 19.88992382650619, "learning_rate": 4.398148148148148e-07, "logits/generated": -2.0685057640075684, "logits/oppo_generated": -2.8043360710144043, "logits/oppo_real": -3.0211949348449707, "logits/real": -2.244368314743042, "logps/generated": -308.2628173828125, "logps/oppo_gen": -77.71004486083984, "logps/oppo_real": -389.77301025390625, "logps/real": -367.9434509277344, "loss": 0.1625, "loss/gen": 0.025741413235664368, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -230.55276489257812, "rewards/margins": 252.38232421875, "rewards/real": 21.829570770263672, "step": 208 }, { "epoch": 0.87, "grad_norm": 25.132100938015384, "learning_rate": 4.394586894586894e-07, "logits/generated": -1.9696589708328247, "logits/oppo_generated": -2.7760987281799316, "logits/oppo_real": -2.740163803100586, "logits/real": -2.227613925933838, "logps/generated": -362.2425231933594, "logps/oppo_gen": -88.69313049316406, "logps/oppo_real": -338.8006591796875, "logps/real": -327.68731689453125, "loss": 0.1775, "loss/gen": 0.0, "loss/real": 0.09456821531057358, "rewards/accuracies": 1.0, "rewards/generated": -273.5494384765625, "rewards/margins": 284.6627502441406, "rewards/real": 11.113346099853516, "step": 209 }, { "epoch": 0.88, "grad_norm": 79.51874594730006, "learning_rate": 4.391025641025641e-07, "logits/generated": -1.8997169733047485, "logits/oppo_generated": -2.7127938270568848, "logits/oppo_real": -2.803234577178955, "logits/real": -2.1146082878112793, "logps/generated": -387.34759521484375, "logps/oppo_gen": -85.75541687011719, "logps/oppo_real": -242.4071807861328, "logps/real": -236.056884765625, "loss": 0.1955, "loss/gen": 0.0035225003957748413, "loss/real": 0.07803569734096527, "rewards/accuracies": 1.0, "rewards/generated": -301.59222412109375, "rewards/margins": 307.9425048828125, "rewards/real": 6.350289821624756, "step": 210 }, { "epoch": 0.88, "grad_norm": 82.73605256797819, "learning_rate": 4.3874643874643876e-07, "logits/generated": -2.0518431663513184, "logits/oppo_generated": -2.995426654815674, "logits/oppo_real": -2.8803281784057617, "logits/real": -2.3870060443878174, "logps/generated": -346.0346984863281, "logps/oppo_gen": -68.82854461669922, "logps/oppo_real": -337.844482421875, "logps/real": -309.0092468261719, "loss": 0.1528, "loss/gen": 0.0, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -277.2061767578125, "rewards/margins": 306.04144287109375, "rewards/real": 28.835275650024414, "step": 211 }, { "epoch": 0.89, "grad_norm": 35.13018825672765, "learning_rate": 4.3839031339031333e-07, "logits/generated": -2.083667278289795, "logits/oppo_generated": -2.6126418113708496, "logits/oppo_real": -3.0222294330596924, "logits/real": -2.0695085525512695, "logps/generated": -289.85260009765625, "logps/oppo_gen": -56.36054992675781, "logps/oppo_real": -325.3075256347656, "logps/real": -330.01385498046875, "loss": 0.2521, "loss/gen": 0.009135901927947998, "loss/real": 0.25433236360549927, "rewards/accuracies": 1.0, "rewards/generated": -233.49203491210938, "rewards/margins": 228.78570556640625, "rewards/real": -4.706315994262695, "step": 212 }, { "epoch": 0.89, "grad_norm": 27.606224618649186, "learning_rate": 4.38034188034188e-07, "logits/generated": -2.159607410430908, "logits/oppo_generated": -3.026592254638672, "logits/oppo_real": -2.9974026679992676, "logits/real": -2.492272138595581, "logps/generated": -330.615478515625, "logps/oppo_gen": -81.62860107421875, "logps/oppo_real": -354.01513671875, "logps/real": -344.7056579589844, "loss": 0.2107, "loss/gen": 0.0, "loss/real": 0.024997137486934662, "rewards/accuracies": 1.0, "rewards/generated": -248.98684692382812, "rewards/margins": 258.29632568359375, "rewards/real": 9.309473991394043, "step": 213 }, { "epoch": 0.9, "grad_norm": 15.104872244654826, "learning_rate": 4.376780626780627e-07, "logits/generated": -2.0585901737213135, "logits/oppo_generated": -2.86299991607666, "logits/oppo_real": -2.897392749786377, "logits/real": -2.3068199157714844, "logps/generated": -285.48638916015625, "logps/oppo_gen": -55.654396057128906, "logps/oppo_real": -286.4037170410156, "logps/real": -298.23529052734375, "loss": 0.14, "loss/gen": 0.011369600892066956, "loss/real": 0.27243572473526, "rewards/accuracies": 1.0, "rewards/generated": -229.83200073242188, "rewards/margins": 218.0004425048828, "rewards/real": -11.831571578979492, "step": 214 }, { "epoch": 0.9, "grad_norm": 46.20299554006212, "learning_rate": 4.373219373219373e-07, "logits/generated": -2.1441431045532227, "logits/oppo_generated": -2.8678367137908936, "logits/oppo_real": -2.797013759613037, "logits/real": -2.3208460807800293, "logps/generated": -526.330810546875, "logps/oppo_gen": -154.916748046875, "logps/oppo_real": -268.4582824707031, "logps/real": -262.050537109375, "loss": 0.1611, "loss/gen": 0.0, "loss/real": 0.10568805783987045, "rewards/accuracies": 1.0, "rewards/generated": -371.41400146484375, "rewards/margins": 377.82177734375, "rewards/real": 6.407746315002441, "step": 215 }, { "epoch": 0.9, "grad_norm": 42.97747780678634, "learning_rate": 4.3696581196581193e-07, "logits/generated": -2.335385799407959, "logits/oppo_generated": -2.879833221435547, "logits/oppo_real": -3.0112786293029785, "logits/real": -2.5094590187072754, "logps/generated": -324.74005126953125, "logps/oppo_gen": -96.10844421386719, "logps/oppo_real": -492.59039306640625, "logps/real": -506.662109375, "loss": 0.2189, "loss/gen": 0.263028621673584, "loss/real": 0.28753662109375, "rewards/accuracies": 0.875, "rewards/generated": -228.631591796875, "rewards/margins": 214.55987548828125, "rewards/real": -14.071721076965332, "step": 216 }, { "epoch": 0.91, "grad_norm": 96.03388887522988, "learning_rate": 4.366096866096866e-07, "logits/generated": -2.3405416011810303, "logits/oppo_generated": -2.855457305908203, "logits/oppo_real": -3.161579132080078, "logits/real": -2.4299869537353516, "logps/generated": -266.0459289550781, "logps/oppo_gen": -79.04156494140625, "logps/oppo_real": -508.73779296875, "logps/real": -511.5892333984375, "loss": 0.218, "loss/gen": 0.43865615129470825, "loss/real": 0.2312847524881363, "rewards/accuracies": 0.875, "rewards/generated": -187.00439453125, "rewards/margins": 184.15298461914062, "rewards/real": -2.851390838623047, "step": 217 }, { "epoch": 0.91, "grad_norm": 49.46367136754257, "learning_rate": 4.362535612535612e-07, "logits/generated": -2.3023407459259033, "logits/oppo_generated": -2.8270015716552734, "logits/oppo_real": -2.9884450435638428, "logits/real": -2.3669018745422363, "logps/generated": -310.2086181640625, "logps/oppo_gen": -79.96229553222656, "logps/oppo_real": -295.296630859375, "logps/real": -278.3116455078125, "loss": 0.119, "loss/gen": 0.24770958721637726, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -230.246337890625, "rewards/margins": 247.2313232421875, "rewards/real": 16.985002517700195, "step": 218 }, { "epoch": 0.92, "grad_norm": 54.32108273310534, "learning_rate": 4.358974358974359e-07, "logits/generated": -2.0078024864196777, "logits/oppo_generated": -2.7040886878967285, "logits/oppo_real": -2.816561698913574, "logits/real": -2.148149013519287, "logps/generated": -327.4352111816406, "logps/oppo_gen": -55.71031188964844, "logps/oppo_real": -202.95962524414062, "logps/real": -176.31039428710938, "loss": 0.2428, "loss/gen": 0.0, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -271.72491455078125, "rewards/margins": 298.3741455078125, "rewards/real": 26.649229049682617, "step": 219 }, { "epoch": 0.92, "grad_norm": 38.766303588881314, "learning_rate": 4.355413105413105e-07, "logits/generated": -1.851919412612915, "logits/oppo_generated": -2.385345458984375, "logits/oppo_real": -2.4835422039031982, "logits/real": -1.8688819408416748, "logps/generated": -298.13861083984375, "logps/oppo_gen": -75.58077239990234, "logps/oppo_real": -339.3034973144531, "logps/real": -292.58990478515625, "loss": 0.1566, "loss/gen": 0.2625175714492798, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -222.55783081054688, "rewards/margins": 269.27142333984375, "rewards/real": 46.713592529296875, "step": 220 }, { "epoch": 0.92, "grad_norm": 38.3459545642733, "learning_rate": 4.3518518518518514e-07, "logits/generated": -2.3448195457458496, "logits/oppo_generated": -3.011491060256958, "logits/oppo_real": -3.0487937927246094, "logits/real": -2.592437744140625, "logps/generated": -380.2159118652344, "logps/oppo_gen": -131.22396850585938, "logps/oppo_real": -400.33868408203125, "logps/real": -382.6166687011719, "loss": 0.2109, "loss/gen": 0.0, "loss/real": 0.009188689291477203, "rewards/accuracies": 1.0, "rewards/generated": -248.991943359375, "rewards/margins": 266.71392822265625, "rewards/real": 17.722003936767578, "step": 221 }, { "epoch": 0.93, "grad_norm": 39.05013241714905, "learning_rate": 4.348290598290598e-07, "logits/generated": -2.286303997039795, "logits/oppo_generated": -2.755108118057251, "logits/oppo_real": -2.8694067001342773, "logits/real": -2.2778568267822266, "logps/generated": -286.8591613769531, "logps/oppo_gen": -61.73572540283203, "logps/oppo_real": -230.838134765625, "logps/real": -234.90147399902344, "loss": 0.1586, "loss/gen": 0.23340168595314026, "loss/real": 0.2623848617076874, "rewards/accuracies": 0.875, "rewards/generated": -225.12344360351562, "rewards/margins": 221.0601043701172, "rewards/real": -4.063333511352539, "step": 222 }, { "epoch": 0.93, "grad_norm": 49.061494814543714, "learning_rate": 4.3447293447293444e-07, "logits/generated": -2.1600050926208496, "logits/oppo_generated": -2.8574419021606445, "logits/oppo_real": -2.923137903213501, "logits/real": -2.323585033416748, "logps/generated": -341.39599609375, "logps/oppo_gen": -82.77210998535156, "logps/oppo_real": -252.58892822265625, "logps/real": -277.39813232421875, "loss": 0.1963, "loss/gen": 0.011263325810432434, "loss/real": 0.3864287734031677, "rewards/accuracies": 0.875, "rewards/generated": -258.62384033203125, "rewards/margins": 233.81466674804688, "rewards/real": -24.80919075012207, "step": 223 }, { "epoch": 0.94, "grad_norm": 49.97145181094652, "learning_rate": 4.341168091168091e-07, "logits/generated": -2.0936217308044434, "logits/oppo_generated": -2.994565010070801, "logits/oppo_real": -2.8149280548095703, "logits/real": -2.4390323162078857, "logps/generated": -267.6043395996094, "logps/oppo_gen": -48.2861213684082, "logps/oppo_real": -137.37625122070312, "logps/real": -160.69287109375, "loss": 0.301, "loss/gen": 0.5201160907745361, "loss/real": 0.3812367916107178, "rewards/accuracies": 0.875, "rewards/generated": -219.31820678710938, "rewards/margins": 196.00160217285156, "rewards/real": -23.31661605834961, "step": 224 }, { "epoch": 0.94, "grad_norm": 54.01997514664136, "learning_rate": 4.3376068376068374e-07, "logits/generated": -2.2345826625823975, "logits/oppo_generated": -2.816603422164917, "logits/oppo_real": -2.9343314170837402, "logits/real": -2.339372158050537, "logps/generated": -238.27630615234375, "logps/oppo_gen": -30.44548988342285, "logps/oppo_real": -174.9966278076172, "logps/real": -159.52218627929688, "loss": 0.2248, "loss/gen": 0.07372879981994629, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -207.830810546875, "rewards/margins": 223.30526733398438, "rewards/real": 15.47445297241211, "step": 225 }, { "epoch": 0.95, "grad_norm": 26.33121852189253, "learning_rate": 4.3340455840455836e-07, "logits/generated": -2.161853075027466, "logits/oppo_generated": -2.6415185928344727, "logits/oppo_real": -3.0115818977355957, "logits/real": -2.075220823287964, "logps/generated": -362.1558532714844, "logps/oppo_gen": -93.466064453125, "logps/oppo_real": -340.529296875, "logps/real": -308.37493896484375, "loss": 0.1905, "loss/gen": 0.0, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -268.68975830078125, "rewards/margins": 300.8441162109375, "rewards/real": 32.154354095458984, "step": 226 }, { "epoch": 0.95, "grad_norm": 37.262840566835145, "learning_rate": 4.3304843304843304e-07, "logits/generated": -2.1919541358947754, "logits/oppo_generated": -2.7984108924865723, "logits/oppo_real": -2.9754528999328613, "logits/real": -2.3059372901916504, "logps/generated": -304.6134338378906, "logps/oppo_gen": -69.67858123779297, "logps/oppo_real": -268.7974853515625, "logps/real": -259.7208251953125, "loss": 0.1259, "loss/gen": 0.0012919306755065918, "loss/real": 0.04866264760494232, "rewards/accuracies": 1.0, "rewards/generated": -234.93484497070312, "rewards/margins": 244.01148986816406, "rewards/real": 9.076637268066406, "step": 227 }, { "epoch": 0.95, "grad_norm": 21.304152478843108, "learning_rate": 4.326923076923077e-07, "logits/generated": -2.002150058746338, "logits/oppo_generated": -2.7994847297668457, "logits/oppo_real": -2.687981605529785, "logits/real": -2.2970423698425293, "logps/generated": -286.0855712890625, "logps/oppo_gen": -76.17577362060547, "logps/oppo_real": -381.5020751953125, "logps/real": -376.30322265625, "loss": 0.1755, "loss/gen": 0.2530289888381958, "loss/real": 0.24476546049118042, "rewards/accuracies": 1.0, "rewards/generated": -209.9097900390625, "rewards/margins": 215.108642578125, "rewards/real": 5.198863983154297, "step": 228 }, { "epoch": 0.96, "grad_norm": 26.23410279415064, "learning_rate": 4.323361823361823e-07, "logits/generated": -2.1952879428863525, "logits/oppo_generated": -2.8429031372070312, "logits/oppo_real": -3.0224597454071045, "logits/real": -2.2873964309692383, "logps/generated": -302.1619873046875, "logps/oppo_gen": -78.5534439086914, "logps/oppo_real": -246.5026397705078, "logps/real": -227.45849609375, "loss": 0.1693, "loss/gen": 0.23556922376155853, "loss/real": 0.0011737123131752014, "rewards/accuracies": 1.0, "rewards/generated": -223.60853576660156, "rewards/margins": 242.65267944335938, "rewards/real": 19.044147491455078, "step": 229 }, { "epoch": 0.96, "grad_norm": 30.91998695442349, "learning_rate": 4.3198005698005696e-07, "logits/generated": -1.982604742050171, "logits/oppo_generated": -2.5529236793518066, "logits/oppo_real": -2.7146146297454834, "logits/real": -1.9502203464508057, "logps/generated": -342.57330322265625, "logps/oppo_gen": -79.70944213867188, "logps/oppo_real": -106.01055145263672, "logps/real": -143.1019744873047, "loss": 0.2063, "loss/gen": 0.0, "loss/real": 0.45912817120552063, "rewards/accuracies": 1.0, "rewards/generated": -262.8638610839844, "rewards/margins": 225.77243041992188, "rewards/real": -37.09141540527344, "step": 230 }, { "epoch": 0.97, "grad_norm": 68.66924234145881, "learning_rate": 4.3162393162393163e-07, "logits/generated": -1.856884241104126, "logits/oppo_generated": -2.5894346237182617, "logits/oppo_real": -2.6849865913391113, "logits/real": -1.9867148399353027, "logps/generated": -390.6427001953125, "logps/oppo_gen": -67.09019470214844, "logps/oppo_real": -256.4427185058594, "logps/real": -235.26731872558594, "loss": 0.1923, "loss/gen": 0.02218911051750183, "loss/real": 0.002973802387714386, "rewards/accuracies": 1.0, "rewards/generated": -323.552490234375, "rewards/margins": 344.7278747558594, "rewards/real": 21.17538070678711, "step": 231 }, { "epoch": 0.97, "grad_norm": 25.47653974048355, "learning_rate": 4.3126780626780625e-07, "logits/generated": -2.229971408843994, "logits/oppo_generated": -2.959817886352539, "logits/oppo_real": -2.9362192153930664, "logits/real": -2.4872889518737793, "logps/generated": -313.7774658203125, "logps/oppo_gen": -82.48292541503906, "logps/oppo_real": -458.88818359375, "logps/real": -453.343017578125, "loss": 0.1714, "loss/gen": 0.06390117108821869, "loss/real": 0.09897678345441818, "rewards/accuracies": 1.0, "rewards/generated": -231.29452514648438, "rewards/margins": 236.83969116210938, "rewards/real": 5.545146942138672, "step": 232 }, { "epoch": 0.97, "grad_norm": 44.17547296984351, "learning_rate": 4.309116809116809e-07, "logits/generated": -2.093695878982544, "logits/oppo_generated": -2.7284858226776123, "logits/oppo_real": -2.8326492309570312, "logits/real": -2.21309757232666, "logps/generated": -323.655029296875, "logps/oppo_gen": -60.89936828613281, "logps/oppo_real": -245.58233642578125, "logps/real": -238.11309814453125, "loss": 0.173, "loss/gen": 0.0006021559238433838, "loss/real": 0.018122456967830658, "rewards/accuracies": 1.0, "rewards/generated": -262.75567626953125, "rewards/margins": 270.2248840332031, "rewards/real": 7.469233512878418, "step": 233 }, { "epoch": 0.98, "grad_norm": 33.680569135700296, "learning_rate": 4.3055555555555555e-07, "logits/generated": -2.0983548164367676, "logits/oppo_generated": -2.884782075881958, "logits/oppo_real": -3.007986545562744, "logits/real": -2.3658394813537598, "logps/generated": -301.1651611328125, "logps/oppo_gen": -64.29571533203125, "logps/oppo_real": -445.2386169433594, "logps/real": -415.5571594238281, "loss": 0.1804, "loss/gen": 0.0, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -236.8694610595703, "rewards/margins": 266.5509338378906, "rewards/real": 29.681480407714844, "step": 234 }, { "epoch": 0.98, "grad_norm": 20.284545818732685, "learning_rate": 4.3019943019943017e-07, "logits/generated": -2.0810956954956055, "logits/oppo_generated": -2.8430304527282715, "logits/oppo_real": -2.873483657836914, "logits/real": -2.3326306343078613, "logps/generated": -320.660400390625, "logps/oppo_gen": -68.79239654541016, "logps/oppo_real": -391.89910888671875, "logps/real": -372.5426330566406, "loss": 0.1924, "loss/gen": 0.0, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -251.86802673339844, "rewards/margins": 271.22454833984375, "rewards/real": 19.35650634765625, "step": 235 }, { "epoch": 0.99, "grad_norm": 31.462047177474897, "learning_rate": 4.2984330484330485e-07, "logits/generated": -2.0807740688323975, "logits/oppo_generated": -2.8508265018463135, "logits/oppo_real": -2.9677348136901855, "logits/real": -2.363823890686035, "logps/generated": -337.72705078125, "logps/oppo_gen": -88.43344116210938, "logps/oppo_real": -438.55322265625, "logps/real": -411.73760986328125, "loss": 0.2283, "loss/gen": 0.009022071957588196, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -249.2935791015625, "rewards/margins": 276.10919189453125, "rewards/real": 26.81560516357422, "step": 236 }, { "epoch": 0.99, "grad_norm": 30.65675470245862, "learning_rate": 4.294871794871794e-07, "logits/generated": -2.2450156211853027, "logits/oppo_generated": -2.816070079803467, "logits/oppo_real": -3.012850761413574, "logits/real": -2.3096275329589844, "logps/generated": -328.40545654296875, "logps/oppo_gen": -55.2912483215332, "logps/oppo_real": -255.20977783203125, "logps/real": -242.8808135986328, "loss": 0.1844, "loss/gen": 0.014506042003631592, "loss/real": 0.023922577500343323, "rewards/accuracies": 1.0, "rewards/generated": -273.1142272949219, "rewards/margins": 285.44317626953125, "rewards/real": 12.328951835632324, "step": 237 }, { "epoch": 1.0, "grad_norm": 60.6619853354945, "learning_rate": 4.291310541310541e-07, "logits/generated": -2.2790613174438477, "logits/oppo_generated": -2.701869487762451, "logits/oppo_real": -2.963564872741699, "logits/real": -2.223146915435791, "logps/generated": -349.9393310546875, "logps/oppo_gen": -83.03327941894531, "logps/oppo_real": -312.4057312011719, "logps/real": -290.92095947265625, "loss": 0.1719, "loss/gen": 0.0, "loss/real": 0.0, "rewards/accuracies": 1.0, "rewards/generated": -266.9060363769531, "rewards/margins": 288.39080810546875, "rewards/real": 21.484760284423828, "step": 238 }, { "epoch": 1.0, "grad_norm": 47.305535835704525, "learning_rate": 4.2877492877492877e-07, "logits/generated": -2.2459330558776855, "logits/oppo_generated": -2.8546152114868164, "logits/oppo_real": -3.036848545074463, "logits/real": -2.419419050216675, "logps/generated": -338.7462158203125, "logps/oppo_gen": -75.19477844238281, "logps/oppo_real": -314.191162109375, "logps/real": -302.6313171386719, "loss": 0.1123, "loss/gen": 0.0, "loss/real": 0.11184393614530563, "rewards/accuracies": 1.0, "rewards/generated": -263.5514221191406, "rewards/margins": 275.1112365722656, "rewards/real": 11.559805870056152, "step": 239 } ], "logging_steps": 1.0, "max_steps": 1434, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "total_flos": 0.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }