diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,10068 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 1.0, + "eval_steps": 100.0, + "global_step": 478, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.0, + "grad_norm": 18.88450857996153, + "learning_rate": 1.0416666666666666e-08, + "logits/generated": -2.8085083961486816, + "logits/oppo_generated": -2.8376712799072266, + "logits/oppo_real": -2.8085083961486816, + "logits/real": -2.8376712799072266, + "logps/generated": -72.26029968261719, + "logps/oppo_gen": -72.26029968261719, + "logps/oppo_real": -321.1210021972656, + "logps/real": -321.1210021972656, + "loss": 1.9028, + "loss/gen": 1.7014132738113403, + "loss/real": 0.20141328871250153, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 1 + }, + { + "epoch": 0.0, + "grad_norm": 20.256159846297372, + "learning_rate": 2.083333333333333e-08, + "logits/generated": -2.558225154876709, + "logits/oppo_generated": -2.680725574493408, + "logits/oppo_real": -2.558225154876709, + "logits/real": -2.680725574493408, + "logps/generated": -77.56204223632812, + "logps/oppo_gen": -77.56204223632812, + "logps/oppo_real": -309.978271484375, + "logps/real": -309.978271484375, + "loss": 1.9028, + "loss/gen": 1.7014132738113403, + "loss/real": 0.20141328871250153, + "rewards/accuracies": 0.0, + "rewards/generated": 0.0, + "rewards/margins": 0.0, + "rewards/real": 0.0, + "step": 2 + }, + { + "epoch": 0.01, + "grad_norm": 20.0893982821747, + "learning_rate": 3.125e-08, + "logits/generated": -2.7066292762756348, + "logits/oppo_generated": -2.7376646995544434, + "logits/oppo_real": -2.707059860229492, + "logits/real": -2.7373762130737305, + "logps/generated": -96.45381164550781, + "logps/oppo_gen": -96.59260559082031, + "logps/oppo_real": -345.4535827636719, + "logps/real": -345.49041748046875, + "loss": 1.903, + "loss/gen": 1.7025485038757324, + "loss/real": 0.20148169994354248, + "rewards/accuracies": 0.3125, + "rewards/generated": 0.13880300521850586, + "rewards/margins": -0.17563974857330322, + "rewards/real": -0.03683674335479736, + "step": 3 + }, + { + "epoch": 0.01, + "grad_norm": 18.968951103237924, + "learning_rate": 4.166666666666666e-08, + "logits/generated": -2.8126296997070312, + "logits/oppo_generated": -2.7261557579040527, + "logits/oppo_real": -2.8125224113464355, + "logits/real": -2.7257490158081055, + "logps/generated": -69.03158569335938, + "logps/oppo_gen": -69.00636291503906, + "logps/oppo_real": -227.8314208984375, + "logps/real": -227.7684326171875, + "loss": 1.9026, + "loss/gen": 1.7012073993682861, + "loss/real": 0.20130020380020142, + "rewards/accuracies": 0.6875, + "rewards/generated": -0.02523219585418701, + "rewards/margins": 0.08823049068450928, + "rewards/real": 0.06299829483032227, + "step": 4 + }, + { + "epoch": 0.01, + "grad_norm": 20.702301723017644, + "learning_rate": 5.208333333333333e-08, + "logits/generated": -2.839081287384033, + "logits/oppo_generated": -2.9415202140808105, + "logits/oppo_real": -2.8390822410583496, + "logits/real": -2.9421164989471436, + "logps/generated": -73.87603759765625, + "logps/oppo_gen": -73.82575988769531, + "logps/oppo_real": -342.5164794921875, + "logps/real": -342.60211181640625, + "loss": 1.9028, + "loss/gen": 1.7010023593902588, + "loss/real": 0.20157013833522797, + "rewards/accuracies": 0.5, + "rewards/generated": -0.050284504890441895, + "rewards/margins": -0.035373032093048096, + "rewards/real": -0.08565753698348999, + "step": 5 + }, + { + "epoch": 0.01, + "grad_norm": 18.434664440323935, + "learning_rate": 6.25e-08, + "logits/generated": -2.6757049560546875, + "logits/oppo_generated": -2.6438450813293457, + "logits/oppo_real": -2.675816059112549, + "logits/real": -2.6441245079040527, + "logps/generated": -86.78526306152344, + "logps/oppo_gen": -86.71319580078125, + "logps/oppo_real": -326.2207946777344, + "logps/real": -326.25872802734375, + "loss": 1.9024, + "loss/gen": 1.70082426071167, + "loss/real": 0.20148399472236633, + "rewards/accuracies": 0.625, + "rewards/generated": -0.07205724716186523, + "rewards/margins": 0.03410625457763672, + "rewards/real": -0.037950992584228516, + "step": 6 + }, + { + "epoch": 0.01, + "grad_norm": 19.57938365741621, + "learning_rate": 7.291666666666667e-08, + "logits/generated": -2.7598161697387695, + "logits/oppo_generated": -2.8367371559143066, + "logits/oppo_real": -2.759450912475586, + "logits/real": -2.837430953979492, + "logps/generated": -71.20977783203125, + "logps/oppo_gen": -71.09225463867188, + "logps/oppo_real": -338.93975830078125, + "logps/real": -339.0378112792969, + "loss": 1.9023, + "loss/gen": 1.7004528045654297, + "loss/real": 0.20159286260604858, + "rewards/accuracies": 0.5625, + "rewards/generated": -0.117523193359375, + "rewards/margins": 0.01949334144592285, + "rewards/real": -0.09802985191345215, + "step": 7 + }, + { + "epoch": 0.02, + "grad_norm": 18.098258660786524, + "learning_rate": 8.333333333333333e-08, + "logits/generated": -2.9020438194274902, + "logits/oppo_generated": -2.737520694732666, + "logits/oppo_real": -2.9021873474121094, + "logits/real": -2.738926410675049, + "logps/generated": -57.77384948730469, + "logps/oppo_gen": -57.62103271484375, + "logps/oppo_real": -273.72210693359375, + "logps/real": -273.86004638671875, + "loss": 1.9019, + "loss/gen": 1.7001641988754272, + "loss/real": 0.20166659355163574, + "rewards/accuracies": 0.6875, + "rewards/generated": -0.1528165340423584, + "rewards/margins": 0.014842987060546875, + "rewards/real": -0.13797354698181152, + "step": 8 + }, + { + "epoch": 0.02, + "grad_norm": 18.096628859382037, + "learning_rate": 9.375e-08, + "logits/generated": -2.5104784965515137, + "logits/oppo_generated": -2.585773468017578, + "logits/oppo_real": -2.5103254318237305, + "logits/real": -2.5853567123413086, + "logps/generated": -56.835174560546875, + "logps/oppo_gen": -56.53358459472656, + "logps/oppo_real": -179.40626525878906, + "logps/real": -179.69265747070312, + "loss": 1.9006, + "loss/gen": 1.6989485025405884, + "loss/real": 0.2019369751214981, + "rewards/accuracies": 0.5, + "rewards/generated": -0.3015878200531006, + "rewards/margins": 0.015174269676208496, + "rewards/real": -0.2864135503768921, + "step": 9 + }, + { + "epoch": 0.02, + "grad_norm": 19.683420491274582, + "learning_rate": 1.0416666666666667e-07, + "logits/generated": -2.6931896209716797, + "logits/oppo_generated": -2.686525344848633, + "logits/oppo_real": -2.6955156326293945, + "logits/real": -2.6843934059143066, + "logps/generated": -62.24772262573242, + "logps/oppo_gen": -61.876277923583984, + "logps/oppo_real": -247.48609924316406, + "logps/real": -247.6177520751953, + "loss": 1.8999, + "loss/gen": 1.698378086090088, + "loss/real": 0.20165444910526276, + "rewards/accuracies": 0.75, + "rewards/generated": -0.3714407682418823, + "rewards/margins": 0.23977923393249512, + "rewards/real": -0.1316615343093872, + "step": 10 + }, + { + "epoch": 0.02, + "grad_norm": 17.437652259079883, + "learning_rate": 1.1458333333333332e-07, + "logits/generated": -2.916860818862915, + "logits/oppo_generated": -2.79710054397583, + "logits/oppo_real": -2.9184556007385254, + "logits/real": -2.795858383178711, + "logps/generated": -77.727783203125, + "logps/oppo_gen": -77.069091796875, + "logps/oppo_real": -262.61822509765625, + "logps/real": -262.8753662109375, + "loss": 1.8994, + "loss/gen": 1.6960327625274658, + "loss/real": 0.20188409090042114, + "rewards/accuracies": 0.6875, + "rewards/generated": -0.6587002277374268, + "rewards/margins": 0.40155017375946045, + "rewards/real": -0.2571500539779663, + "step": 11 + }, + { + "epoch": 0.03, + "grad_norm": 18.252566640471244, + "learning_rate": 1.25e-07, + "logits/generated": -2.99216890335083, + "logits/oppo_generated": -2.744597911834717, + "logits/oppo_real": -2.9952921867370605, + "logits/real": -2.742313861846924, + "logps/generated": -61.30070114135742, + "logps/oppo_gen": -60.12853240966797, + "logps/oppo_real": -190.38400268554688, + "logps/real": -191.3928680419922, + "loss": 1.8945, + "loss/gen": 1.6918413639068604, + "loss/real": 0.20326292514801025, + "rewards/accuracies": 0.5, + "rewards/generated": -1.1721669435501099, + "rewards/margins": 0.16329419612884521, + "rewards/real": -1.0088727474212646, + "step": 12 + }, + { + "epoch": 0.03, + "grad_norm": 18.974558611775894, + "learning_rate": 1.3541666666666666e-07, + "logits/generated": -2.887937545776367, + "logits/oppo_generated": -2.9385900497436523, + "logits/oppo_real": -2.8927111625671387, + "logits/real": -2.933300256729126, + "logps/generated": -86.03516387939453, + "logps/oppo_gen": -84.29090118408203, + "logps/oppo_real": -449.3851318359375, + "logps/real": -450.25927734375, + "loss": 1.8918, + "loss/gen": 1.687178134918213, + "loss/real": 0.2030172348022461, + "rewards/accuracies": 0.875, + "rewards/generated": -1.744260549545288, + "rewards/margins": 0.8700805902481079, + "rewards/real": -0.8741799592971802, + "step": 13 + }, + { + "epoch": 0.03, + "grad_norm": 18.025631774750817, + "learning_rate": 1.4583333333333335e-07, + "logits/generated": -2.9185471534729004, + "logits/oppo_generated": -2.829657793045044, + "logits/oppo_real": -2.9221749305725098, + "logits/real": -2.8240890502929688, + "logps/generated": -90.33718872070312, + "logps/oppo_gen": -88.6211166381836, + "logps/oppo_real": -346.31817626953125, + "logps/real": -347.5406494140625, + "loss": 1.8906, + "loss/gen": 1.6874079704284668, + "loss/real": 0.20365647971630096, + "rewards/accuracies": 0.9375, + "rewards/generated": -1.716069221496582, + "rewards/margins": 0.4936128854751587, + "rewards/real": -1.2224563360214233, + "step": 14 + }, + { + "epoch": 0.03, + "grad_norm": 18.862430656816294, + "learning_rate": 1.5624999999999999e-07, + "logits/generated": -2.743849754333496, + "logits/oppo_generated": -2.6369616985321045, + "logits/oppo_real": -2.745821475982666, + "logits/real": -2.6341629028320312, + "logps/generated": -60.22393798828125, + "logps/oppo_gen": -58.13185501098633, + "logps/oppo_real": -183.13169860839844, + "logps/real": -184.82443237304688, + "loss": 1.8892, + "loss/gen": 1.6843458414077759, + "loss/real": 0.20452776551246643, + "rewards/accuracies": 0.6875, + "rewards/generated": -2.09208607673645, + "rewards/margins": 0.39935922622680664, + "rewards/real": -1.6927268505096436, + "step": 15 + }, + { + "epoch": 0.03, + "grad_norm": 18.85043818523566, + "learning_rate": 1.6666666666666665e-07, + "logits/generated": -2.775763750076294, + "logits/oppo_generated": -2.8875584602355957, + "logits/oppo_real": -2.7887091636657715, + "logits/real": -2.872068405151367, + "logps/generated": -93.86468505859375, + "logps/oppo_gen": -89.01815795898438, + "logps/oppo_real": -355.2412414550781, + "logps/real": -358.21697998046875, + "loss": 1.8728, + "loss/gen": 1.661976933479309, + "loss/real": 0.2069297432899475, + "rewards/accuracies": 0.9375, + "rewards/generated": -4.846524238586426, + "rewards/margins": 1.8708148002624512, + "rewards/real": -2.9757091999053955, + "step": 16 + }, + { + "epoch": 0.04, + "grad_norm": 18.85426576142237, + "learning_rate": 1.7708333333333334e-07, + "logits/generated": -2.926854133605957, + "logits/oppo_generated": -2.791188955307007, + "logits/oppo_real": -2.941716194152832, + "logits/real": -2.772773265838623, + "logps/generated": -83.66239929199219, + "logps/oppo_gen": -78.4744873046875, + "logps/oppo_real": -372.87506103515625, + "logps/real": -375.8855285644531, + "loss": 1.867, + "loss/gen": 1.659225583076477, + "loss/real": 0.2069997638463974, + "rewards/accuracies": 1.0, + "rewards/generated": -5.187913417816162, + "rewards/margins": 2.1774630546569824, + "rewards/real": -3.0104501247406006, + "step": 17 + }, + { + "epoch": 0.04, + "grad_norm": 18.277145472716995, + "learning_rate": 1.875e-07, + "logits/generated": -2.619422197341919, + "logits/oppo_generated": -2.6162495613098145, + "logits/oppo_real": -2.63336443901062, + "logits/real": -2.597346782684326, + "logps/generated": -91.84591674804688, + "logps/oppo_gen": -85.63116455078125, + "logps/oppo_real": -327.649169921875, + "logps/real": -331.60296630859375, + "loss": 1.861, + "loss/gen": 1.6509480476379395, + "loss/real": 0.20878931879997253, + "rewards/accuracies": 0.8125, + "rewards/generated": -6.214766502380371, + "rewards/margins": 2.260969638824463, + "rewards/real": -3.953796863555908, + "step": 18 + }, + { + "epoch": 0.04, + "grad_norm": 17.27219102382995, + "learning_rate": 1.9791666666666664e-07, + "logits/generated": -2.609903335571289, + "logits/oppo_generated": -2.712047576904297, + "logits/oppo_real": -2.627098321914673, + "logits/real": -2.693801164627075, + "logps/generated": -86.11844635009766, + "logps/oppo_gen": -79.7235107421875, + "logps/oppo_real": -208.48953247070312, + "logps/real": -212.92434692382812, + "loss": 1.8585, + "loss/gen": 1.64948308467865, + "loss/real": 0.20968025922775269, + "rewards/accuracies": 1.0, + "rewards/generated": -6.394937515258789, + "rewards/margins": 1.9601283073425293, + "rewards/real": -4.434809684753418, + "step": 19 + }, + { + "epoch": 0.04, + "grad_norm": 18.456748096373012, + "learning_rate": 2.0833333333333333e-07, + "logits/generated": -2.788766384124756, + "logits/oppo_generated": -2.751258373260498, + "logits/oppo_real": -2.8082375526428223, + "logits/real": -2.7272610664367676, + "logps/generated": -74.62376403808594, + "logps/oppo_gen": -67.47988891601562, + "logps/oppo_real": -222.2954864501953, + "logps/real": -227.22134399414062, + "loss": 1.8499, + "loss/gen": 1.6434563398361206, + "loss/real": 0.2105972170829773, + "rewards/accuracies": 0.9375, + "rewards/generated": -7.143871307373047, + "rewards/margins": 2.2180023193359375, + "rewards/real": -4.925868988037109, + "step": 20 + }, + { + "epoch": 0.04, + "grad_norm": 18.999819302954485, + "learning_rate": 2.1875e-07, + "logits/generated": -2.950169086456299, + "logits/oppo_generated": -2.8267569541931152, + "logits/oppo_real": -2.9816665649414062, + "logits/real": -2.7851290702819824, + "logps/generated": -86.47335815429688, + "logps/oppo_gen": -74.723388671875, + "logps/oppo_real": -304.63775634765625, + "logps/real": -312.2291259765625, + "loss": 1.8274, + "loss/gen": 1.6065895557403564, + "loss/real": 0.2158581167459488, + "rewards/accuracies": 0.9375, + "rewards/generated": -11.74997615814209, + "rewards/margins": 4.158588409423828, + "rewards/real": -7.59138822555542, + "step": 21 + }, + { + "epoch": 0.05, + "grad_norm": 17.589315014677144, + "learning_rate": 2.2916666666666663e-07, + "logits/generated": -2.783734083175659, + "logits/oppo_generated": -2.7697672843933105, + "logits/oppo_real": -2.8284473419189453, + "logits/real": -2.7101640701293945, + "logps/generated": -88.10362243652344, + "logps/oppo_gen": -71.9073715209961, + "logps/oppo_real": -272.13043212890625, + "logps/real": -283.3026123046875, + "loss": 1.8022, + "loss/gen": 1.5712285041809082, + "loss/real": 0.22294577956199646, + "rewards/accuracies": 0.9375, + "rewards/generated": -16.19625473022461, + "rewards/margins": 5.024064540863037, + "rewards/real": -11.172189712524414, + "step": 22 + }, + { + "epoch": 0.05, + "grad_norm": 19.18362119156559, + "learning_rate": 2.3958333333333335e-07, + "logits/generated": -3.011516571044922, + "logits/oppo_generated": -2.745267391204834, + "logits/oppo_real": -3.053928852081299, + "logits/real": -2.679607391357422, + "logps/generated": -95.36741638183594, + "logps/oppo_gen": -75.51863098144531, + "logps/oppo_real": -319.1461486816406, + "logps/real": -332.5950927734375, + "loss": 1.7799, + "loss/gen": 1.5424509048461914, + "loss/real": 0.22752627730369568, + "rewards/accuracies": 1.0, + "rewards/generated": -19.848791122436523, + "rewards/margins": 6.399872303009033, + "rewards/real": -13.448917388916016, + "step": 23 + }, + { + "epoch": 0.05, + "grad_norm": 17.90351695029368, + "learning_rate": 2.5e-07, + "logits/generated": -2.585099697113037, + "logits/oppo_generated": -2.7807955741882324, + "logits/oppo_real": -2.645164966583252, + "logits/real": -2.711965799331665, + "logps/generated": -89.25286102294922, + "logps/oppo_gen": -71.00718688964844, + "logps/oppo_real": -250.61138916015625, + "logps/real": -262.8109130859375, + "loss": 1.7768, + "loss/gen": 1.554997444152832, + "loss/real": 0.22490805387496948, + "rewards/accuracies": 1.0, + "rewards/generated": -18.245668411254883, + "rewards/margins": 6.046163558959961, + "rewards/real": -12.199504852294922, + "step": 24 + }, + { + "epoch": 0.05, + "grad_norm": 18.1587231735858, + "learning_rate": 2.604166666666667e-07, + "logits/generated": -2.7362494468688965, + "logits/oppo_generated": -2.7489709854125977, + "logits/oppo_real": -2.7923738956451416, + "logits/real": -2.6788840293884277, + "logps/generated": -75.5267333984375, + "logps/oppo_gen": -55.94059371948242, + "logps/oppo_real": -216.20614624023438, + "logps/real": -232.888916015625, + "loss": 1.7646, + "loss/gen": 1.544620156288147, + "loss/real": 0.23424160480499268, + "rewards/accuracies": 0.8125, + "rewards/generated": -19.58614158630371, + "rewards/margins": 2.9033803939819336, + "rewards/real": -16.68276023864746, + "step": 25 + }, + { + "epoch": 0.05, + "grad_norm": 19.878306371745666, + "learning_rate": 2.708333333333333e-07, + "logits/generated": -2.8480935096740723, + "logits/oppo_generated": -2.759331703186035, + "logits/oppo_real": -2.911282539367676, + "logits/real": -2.6749706268310547, + "logps/generated": -110.72230529785156, + "logps/oppo_gen": -87.22213745117188, + "logps/oppo_real": -327.4888000488281, + "logps/real": -345.6739196777344, + "loss": 1.7452, + "loss/gen": 1.5140717029571533, + "loss/real": 0.2376970797777176, + "rewards/accuracies": 0.875, + "rewards/generated": -23.50015640258789, + "rewards/margins": 5.315024375915527, + "rewards/real": -18.18513298034668, + "step": 26 + }, + { + "epoch": 0.06, + "grad_norm": 20.7959915993819, + "learning_rate": 2.8125e-07, + "logits/generated": -2.706425666809082, + "logits/oppo_generated": -2.980116605758667, + "logits/oppo_real": -2.7865042686462402, + "logits/real": -2.8899600505828857, + "logps/generated": -115.33242797851562, + "logps/oppo_gen": -87.22333526611328, + "logps/oppo_real": -424.35565185546875, + "logps/real": -440.6988525390625, + "loss": 1.7307, + "loss/gen": 1.4784982204437256, + "loss/real": 0.23403891921043396, + "rewards/accuracies": 1.0, + "rewards/generated": -28.109092712402344, + "rewards/margins": 11.765887260437012, + "rewards/real": -16.343204498291016, + "step": 27 + }, + { + "epoch": 0.06, + "grad_norm": 22.86794284710791, + "learning_rate": 2.916666666666667e-07, + "logits/generated": -2.640286922454834, + "logits/oppo_generated": -2.8235785961151123, + "logits/oppo_real": -2.736990451812744, + "logits/real": -2.7280538082122803, + "logps/generated": -107.66032409667969, + "logps/oppo_gen": -73.19400024414062, + "logps/oppo_real": -275.0092468261719, + "logps/real": -299.7300720214844, + "loss": 1.6961, + "loss/gen": 1.429884910583496, + "loss/real": 0.2523398995399475, + "rewards/accuracies": 0.875, + "rewards/generated": -34.46632385253906, + "rewards/margins": 9.745501518249512, + "rewards/real": -24.720821380615234, + "step": 28 + }, + { + "epoch": 0.06, + "grad_norm": 22.34167248864426, + "learning_rate": 3.020833333333333e-07, + "logits/generated": -2.7891035079956055, + "logits/oppo_generated": -2.7930147647857666, + "logits/oppo_real": -2.9117727279663086, + "logits/real": -2.663120746612549, + "logps/generated": -125.35220336914062, + "logps/oppo_gen": -80.30975341796875, + "logps/oppo_real": -230.66831970214844, + "logps/real": -268.874267578125, + "loss": 1.6126, + "loss/gen": 1.3512537479400635, + "loss/real": 0.28433263301849365, + "rewards/accuracies": 0.9375, + "rewards/generated": -45.04244613647461, + "rewards/margins": 6.836463928222656, + "rewards/real": -38.20598220825195, + "step": 29 + }, + { + "epoch": 0.06, + "grad_norm": 19.833808871854, + "learning_rate": 3.1249999999999997e-07, + "logits/generated": -2.690408229827881, + "logits/oppo_generated": -2.6976568698883057, + "logits/oppo_real": -2.8616366386413574, + "logits/real": -2.543367862701416, + "logps/generated": -142.45785522460938, + "logps/oppo_gen": -77.94517517089844, + "logps/oppo_real": -253.06488037109375, + "logps/real": -304.15667724609375, + "loss": 1.5531, + "loss/gen": 1.2099614143371582, + "loss/real": 0.317098468542099, + "rewards/accuracies": 0.9375, + "rewards/generated": -64.51268005371094, + "rewards/margins": 13.420904159545898, + "rewards/real": -51.09177780151367, + "step": 30 + }, + { + "epoch": 0.06, + "grad_norm": 19.015498214963532, + "learning_rate": 3.2291666666666666e-07, + "logits/generated": -2.32857084274292, + "logits/oppo_generated": -2.3693275451660156, + "logits/oppo_real": -2.4662370681762695, + "logits/real": -2.223062038421631, + "logps/generated": -140.2643585205078, + "logps/oppo_gen": -67.86835479736328, + "logps/oppo_real": -219.30712890625, + "logps/real": -277.8334655761719, + "loss": 1.487, + "loss/gen": 1.1561675071716309, + "loss/real": 0.33837342262268066, + "rewards/accuracies": 0.875, + "rewards/generated": -72.39601135253906, + "rewards/margins": 13.869674682617188, + "rewards/real": -58.526336669921875, + "step": 31 + }, + { + "epoch": 0.07, + "grad_norm": 17.094015115917124, + "learning_rate": 3.333333333333333e-07, + "logits/generated": -2.578625202178955, + "logits/oppo_generated": -2.723201274871826, + "logits/oppo_real": -2.7767179012298584, + "logits/real": -2.5331850051879883, + "logps/generated": -151.8267364501953, + "logps/oppo_gen": -71.2327880859375, + "logps/oppo_real": -306.9924011230469, + "logps/real": -375.8905029296875, + "loss": 1.4524, + "loss/gen": 1.1011745929718018, + "loss/real": 0.368760347366333, + "rewards/accuracies": 0.875, + "rewards/generated": -80.59394073486328, + "rewards/margins": 11.695858001708984, + "rewards/real": -68.89808654785156, + "step": 32 + }, + { + "epoch": 0.07, + "grad_norm": 15.386577434137806, + "learning_rate": 3.4375e-07, + "logits/generated": -2.610579013824463, + "logits/oppo_generated": -2.8122611045837402, + "logits/oppo_real": -2.822584629058838, + "logits/real": -2.614462375640869, + "logps/generated": -169.17803955078125, + "logps/oppo_gen": -74.99469757080078, + "logps/oppo_real": -295.076904296875, + "logps/real": -373.369384765625, + "loss": 1.4379, + "loss/gen": 1.0121688842773438, + "loss/real": 0.3995278775691986, + "rewards/accuracies": 0.875, + "rewards/generated": -94.18334197998047, + "rewards/margins": 15.890843391418457, + "rewards/real": -78.2925033569336, + "step": 33 + }, + { + "epoch": 0.07, + "grad_norm": 15.728951573532772, + "learning_rate": 3.541666666666667e-07, + "logits/generated": -2.2474634647369385, + "logits/oppo_generated": -2.425895929336548, + "logits/oppo_real": -2.4456372261047363, + "logits/real": -2.2315173149108887, + "logps/generated": -156.623291015625, + "logps/oppo_gen": -62.23015594482422, + "logps/oppo_real": -248.11083984375, + "logps/real": -324.737548828125, + "loss": 1.3863, + "loss/gen": 1.011872410774231, + "loss/real": 0.39403319358825684, + "rewards/accuracies": 0.9375, + "rewards/generated": -94.39312744140625, + "rewards/margins": 17.766422271728516, + "rewards/real": -76.626708984375, + "step": 34 + }, + { + "epoch": 0.07, + "grad_norm": 16.02933768900159, + "learning_rate": 3.645833333333333e-07, + "logits/generated": -2.539496421813965, + "logits/oppo_generated": -2.8763086795806885, + "logits/oppo_real": -2.7546887397766113, + "logits/real": -2.63142728805542, + "logps/generated": -212.8411865234375, + "logps/oppo_gen": -98.69316101074219, + "logps/oppo_real": -410.1753234863281, + "logps/real": -498.81109619140625, + "loss": 1.3634, + "loss/gen": 0.8914888501167297, + "loss/real": 0.43470653891563416, + "rewards/accuracies": 1.0, + "rewards/generated": -114.14802551269531, + "rewards/margins": 25.512226104736328, + "rewards/real": -88.63580322265625, + "step": 35 + }, + { + "epoch": 0.08, + "grad_norm": 15.791104886304613, + "learning_rate": 3.75e-07, + "logits/generated": -2.6440393924713135, + "logits/oppo_generated": -2.7524499893188477, + "logits/oppo_real": -2.870028018951416, + "logits/real": -2.501885175704956, + "logps/generated": -183.30636596679688, + "logps/oppo_gen": -62.94434356689453, + "logps/oppo_real": -279.46759033203125, + "logps/real": -381.0328369140625, + "loss": 1.3279, + "loss/gen": 0.8554569482803345, + "loss/real": 0.4819982051849365, + "rewards/accuracies": 0.875, + "rewards/generated": -120.36199951171875, + "rewards/margins": 18.79672622680664, + "rewards/real": -101.56527709960938, + "step": 36 + }, + { + "epoch": 0.08, + "grad_norm": 12.472073296155996, + "learning_rate": 3.8541666666666665e-07, + "logits/generated": -2.3085546493530273, + "logits/oppo_generated": -2.651912212371826, + "logits/oppo_real": -2.5685677528381348, + "logits/real": -2.394282817840576, + "logps/generated": -216.96817016601562, + "logps/oppo_gen": -81.71993255615234, + "logps/oppo_real": -309.4556884765625, + "logps/real": -421.0320739746094, + "loss": 1.2951, + "loss/gen": 0.7755213975906372, + "loss/real": 0.5243090391159058, + "rewards/accuracies": 0.9375, + "rewards/generated": -135.24826049804688, + "rewards/margins": 23.671871185302734, + "rewards/real": -111.57638549804688, + "step": 37 + }, + { + "epoch": 0.08, + "grad_norm": 10.256856346455107, + "learning_rate": 3.958333333333333e-07, + "logits/generated": -2.6325173377990723, + "logits/oppo_generated": -2.9185829162597656, + "logits/oppo_real": -2.9221343994140625, + "logits/real": -2.6381330490112305, + "logps/generated": -244.53273010253906, + "logps/oppo_gen": -81.54811096191406, + "logps/oppo_real": -367.9358215332031, + "logps/real": -507.13909912109375, + "loss": 1.2766, + "loss/gen": 0.633612871170044, + "loss/real": 0.6428331136703491, + "rewards/accuracies": 0.75, + "rewards/generated": -162.984619140625, + "rewards/margins": 23.781320571899414, + "rewards/real": -139.20330810546875, + "step": 38 + }, + { + "epoch": 0.08, + "grad_norm": 9.021470209652767, + "learning_rate": 4.0625e-07, + "logits/generated": -2.6545820236206055, + "logits/oppo_generated": -2.6453075408935547, + "logits/oppo_real": -2.9239563941955566, + "logits/real": -2.370694637298584, + "logps/generated": -236.71627807617188, + "logps/oppo_gen": -69.3997802734375, + "logps/oppo_real": -195.3793487548828, + "logps/real": -335.74542236328125, + "loss": 1.2783, + "loss/gen": 0.6135517358779907, + "loss/real": 0.6481477618217468, + "rewards/accuracies": 1.0, + "rewards/generated": -167.3164825439453, + "rewards/margins": 26.950382232666016, + "rewards/real": -140.3660888671875, + "step": 39 + }, + { + "epoch": 0.08, + "grad_norm": 9.477091020530814, + "learning_rate": 4.1666666666666667e-07, + "logits/generated": -2.1519742012023926, + "logits/oppo_generated": -2.5346462726593018, + "logits/oppo_real": -2.469729423522949, + "logits/real": -2.239739418029785, + "logps/generated": -285.01654052734375, + "logps/oppo_gen": -107.60747528076172, + "logps/oppo_real": -305.470703125, + "logps/real": -444.7705078125, + "loss": 1.2708, + "loss/gen": 0.5709100365638733, + "loss/real": 0.6454157829284668, + "rewards/accuracies": 1.0, + "rewards/generated": -177.4090576171875, + "rewards/margins": 38.10923767089844, + "rewards/real": -139.29981994628906, + "step": 40 + }, + { + "epoch": 0.09, + "grad_norm": 9.641810018209553, + "learning_rate": 4.270833333333333e-07, + "logits/generated": -2.347841739654541, + "logits/oppo_generated": -2.7028918266296387, + "logits/oppo_real": -2.7112483978271484, + "logits/real": -2.375851631164551, + "logps/generated": -244.73318481445312, + "logps/oppo_gen": -65.4095687866211, + "logps/oppo_real": -249.814208984375, + "logps/real": -396.6846923828125, + "loss": 1.2691, + "loss/gen": 0.562430739402771, + "loss/real": 0.6824460029602051, + "rewards/accuracies": 0.9375, + "rewards/generated": -179.3236083984375, + "rewards/margins": 32.453147888183594, + "rewards/real": -146.87045288085938, + "step": 41 + }, + { + "epoch": 0.09, + "grad_norm": 10.853774128928466, + "learning_rate": 4.375e-07, + "logits/generated": -2.490652084350586, + "logits/oppo_generated": -2.747931957244873, + "logits/oppo_real": -2.890165328979492, + "logits/real": -2.3857312202453613, + "logps/generated": -266.1440734863281, + "logps/oppo_gen": -75.59771728515625, + "logps/oppo_real": -321.30108642578125, + "logps/real": -481.9388122558594, + "loss": 1.2527, + "loss/gen": 0.516680121421814, + "loss/real": 0.7500206828117371, + "rewards/accuracies": 0.875, + "rewards/generated": -190.54635620117188, + "rewards/margins": 29.908649444580078, + "rewards/real": -160.63771057128906, + "step": 42 + }, + { + "epoch": 0.09, + "grad_norm": 11.695829799222576, + "learning_rate": 4.479166666666667e-07, + "logits/generated": -2.4598608016967773, + "logits/oppo_generated": -2.7500972747802734, + "logits/oppo_real": -2.8781628608703613, + "logits/real": -2.4157962799072266, + "logps/generated": -250.79949951171875, + "logps/oppo_gen": -61.141502380371094, + "logps/oppo_real": -208.20816040039062, + "logps/real": -371.35186767578125, + "loss": 1.2732, + "loss/gen": 0.5206349492073059, + "loss/real": 0.7657498121261597, + "rewards/accuracies": 0.8125, + "rewards/generated": -189.65798950195312, + "rewards/margins": 26.514284133911133, + "rewards/real": -163.1437225341797, + "step": 43 + }, + { + "epoch": 0.09, + "grad_norm": 12.721230754689678, + "learning_rate": 4.5833333333333327e-07, + "logits/generated": -2.316429615020752, + "logits/oppo_generated": -2.86560320854187, + "logits/oppo_real": -2.7520575523376465, + "logits/real": -2.474339008331299, + "logps/generated": -267.144287109375, + "logps/oppo_gen": -70.71253204345703, + "logps/oppo_real": -288.21905517578125, + "logps/real": -453.522216796875, + "loss": 1.2493, + "loss/gen": 0.4936552047729492, + "loss/real": 0.7759716510772705, + "rewards/accuracies": 0.9375, + "rewards/generated": -196.4317626953125, + "rewards/margins": 31.12860107421875, + "rewards/real": -165.30316162109375, + "step": 44 + }, + { + "epoch": 0.09, + "grad_norm": 13.630179724288533, + "learning_rate": 4.6874999999999996e-07, + "logits/generated": -2.31591796875, + "logits/oppo_generated": -2.6641106605529785, + "logits/oppo_real": -2.687716484069824, + "logits/real": -2.3076515197753906, + "logps/generated": -257.56488037109375, + "logps/oppo_gen": -69.85922241210938, + "logps/oppo_real": -253.47152709960938, + "logps/real": -407.678466796875, + "loss": 1.2267, + "loss/gen": 0.5265874862670898, + "loss/real": 0.7205492258071899, + "rewards/accuracies": 0.9375, + "rewards/generated": -187.70565795898438, + "rewards/margins": 33.498741149902344, + "rewards/real": -154.20692443847656, + "step": 45 + }, + { + "epoch": 0.1, + "grad_norm": 9.876900019322825, + "learning_rate": 4.791666666666667e-07, + "logits/generated": -2.3460116386413574, + "logits/oppo_generated": -2.6993772983551025, + "logits/oppo_real": -2.7424378395080566, + "logits/real": -2.3106417655944824, + "logps/generated": -253.16329956054688, + "logps/oppo_gen": -71.79619598388672, + "logps/oppo_real": -243.79006958007812, + "logps/real": -369.48553466796875, + "loss": 1.1683, + "loss/gen": 0.5725552439689636, + "loss/real": 0.5813912749290466, + "rewards/accuracies": 0.875, + "rewards/generated": -181.36709594726562, + "rewards/margins": 55.67161178588867, + "rewards/real": -125.69548034667969, + "step": 46 + }, + { + "epoch": 0.1, + "grad_norm": 12.87668280113079, + "learning_rate": 4.895833333333333e-07, + "logits/generated": -2.4254989624023438, + "logits/oppo_generated": -2.7546112537384033, + "logits/oppo_real": -2.8327903747558594, + "logits/real": -2.399583339691162, + "logps/generated": -216.5120391845703, + "logps/oppo_gen": -68.76174926757812, + "logps/oppo_real": -289.6665954589844, + "logps/real": -394.17529296875, + "loss": 1.2136, + "loss/gen": 0.7142927646636963, + "loss/real": 0.4956602156162262, + "rewards/accuracies": 1.0, + "rewards/generated": -147.7502899169922, + "rewards/margins": 43.241600036621094, + "rewards/real": -104.50868225097656, + "step": 47 + }, + { + "epoch": 0.1, + "grad_norm": 12.230479978436238, + "learning_rate": 5e-07, + "logits/generated": -2.3337907791137695, + "logits/oppo_generated": -2.5967700481414795, + "logits/oppo_real": -2.6954846382141113, + "logits/real": -2.262849807739258, + "logps/generated": -232.96676635742188, + "logps/oppo_gen": -75.533935546875, + "logps/oppo_real": -281.36871337890625, + "logps/real": -386.46337890625, + "loss": 1.183, + "loss/gen": 0.6721818447113037, + "loss/real": 0.49635791778564453, + "rewards/accuracies": 0.9375, + "rewards/generated": -157.43283081054688, + "rewards/margins": 52.33815002441406, + "rewards/real": -105.09467315673828, + "step": 48 + }, + { + "epoch": 0.1, + "grad_norm": 10.696963754698617, + "learning_rate": 4.999933277714308e-07, + "logits/generated": -2.2673449516296387, + "logits/oppo_generated": -2.512704849243164, + "logits/oppo_real": -2.652205467224121, + "logits/real": -2.222139596939087, + "logps/generated": -239.5457763671875, + "logps/oppo_gen": -80.6132583618164, + "logps/oppo_real": -411.4334411621094, + "logps/real": -531.0726318359375, + "loss": 1.1722, + "loss/gen": 0.6539755463600159, + "loss/real": 0.5593386888504028, + "rewards/accuracies": 0.9375, + "rewards/generated": -158.93252563476562, + "rewards/margins": 39.29335021972656, + "rewards/real": -119.63916778564453, + "step": 49 + }, + { + "epoch": 0.1, + "grad_norm": 12.920776172861276, + "learning_rate": 4.999733114418725e-07, + "logits/generated": -2.5660576820373535, + "logits/oppo_generated": -2.7457919120788574, + "logits/oppo_real": -2.8892219066619873, + "logits/real": -2.426638126373291, + "logps/generated": -282.44354248046875, + "logps/oppo_gen": -84.30994415283203, + "logps/oppo_real": -332.35064697265625, + "logps/real": -460.3952331542969, + "loss": 1.151, + "loss/gen": 0.4987773597240448, + "loss/real": 0.6023204326629639, + "rewards/accuracies": 0.9375, + "rewards/generated": -198.13360595703125, + "rewards/margins": 70.08900451660156, + "rewards/real": -128.04458618164062, + "step": 50 + }, + { + "epoch": 0.11, + "grad_norm": 11.308270248503243, + "learning_rate": 4.999399520797532e-07, + "logits/generated": -2.2724039554595947, + "logits/oppo_generated": -2.718198776245117, + "logits/oppo_real": -2.615701675415039, + "logits/real": -2.3728127479553223, + "logps/generated": -255.1387939453125, + "logps/oppo_gen": -69.3196792602539, + "logps/oppo_real": -302.4152526855469, + "logps/real": -413.5482177734375, + "loss": 1.1335, + "loss/gen": 0.5399003028869629, + "loss/real": 0.5259116888046265, + "rewards/accuracies": 1.0, + "rewards/generated": -185.81912231445312, + "rewards/margins": 74.68618774414062, + "rewards/real": -111.1329345703125, + "step": 51 + }, + { + "epoch": 0.11, + "grad_norm": 12.176337661733065, + "learning_rate": 4.998932514657231e-07, + "logits/generated": -2.3679566383361816, + "logits/oppo_generated": -2.7421092987060547, + "logits/oppo_real": -2.7155723571777344, + "logits/real": -2.4217591285705566, + "logps/generated": -250.55404663085938, + "logps/oppo_gen": -69.50028228759766, + "logps/oppo_real": -243.26260375976562, + "logps/real": -365.6922607421875, + "loss": 1.137, + "loss/gen": 0.5619360208511353, + "loss/real": 0.57623291015625, + "rewards/accuracies": 0.875, + "rewards/generated": -181.05377197265625, + "rewards/margins": 58.624122619628906, + "rewards/real": -122.42965698242188, + "step": 52 + }, + { + "epoch": 0.11, + "grad_norm": 10.84681504435497, + "learning_rate": 4.998332120925598e-07, + "logits/generated": -2.3570823669433594, + "logits/oppo_generated": -2.680886745452881, + "logits/oppo_real": -2.738534688949585, + "logits/real": -2.3443045616149902, + "logps/generated": -245.57504272460938, + "logps/oppo_gen": -61.518577575683594, + "logps/oppo_real": -214.97161865234375, + "logps/real": -337.97381591796875, + "loss": 1.1329, + "loss/gen": 0.5838844776153564, + "loss/real": 0.5817942023277283, + "rewards/accuracies": 0.9375, + "rewards/generated": -184.05645751953125, + "rewards/margins": 61.05426788330078, + "rewards/real": -123.002197265625, + "step": 53 + }, + { + "epoch": 0.11, + "grad_norm": 9.795093818593013, + "learning_rate": 4.997598371650346e-07, + "logits/generated": -2.291536808013916, + "logits/oppo_generated": -2.534092903137207, + "logits/oppo_real": -2.636704921722412, + "logits/real": -2.234243869781494, + "logps/generated": -295.8362121582031, + "logps/oppo_gen": -95.54362487792969, + "logps/oppo_real": -232.2601318359375, + "logps/real": -351.40863037109375, + "loss": 1.0871, + "loss/gen": 0.543353796005249, + "loss/real": 0.5595182776451111, + "rewards/accuracies": 1.0, + "rewards/generated": -200.2926025390625, + "rewards/margins": 81.14410400390625, + "rewards/real": -119.14847564697266, + "step": 54 + }, + { + "epoch": 0.12, + "grad_norm": 11.960172044416053, + "learning_rate": 4.996731305997416e-07, + "logits/generated": -2.40325665473938, + "logits/oppo_generated": -2.7552638053894043, + "logits/oppo_real": -2.7744545936584473, + "logits/real": -2.4595704078674316, + "logps/generated": -263.9963073730469, + "logps/oppo_gen": -73.20036315917969, + "logps/oppo_real": -265.6083679199219, + "logps/real": -375.6069641113281, + "loss": 1.0676, + "loss/gen": 0.5453953742980957, + "loss/real": 0.5264946222305298, + "rewards/accuracies": 0.875, + "rewards/generated": -190.79592895507812, + "rewards/margins": 80.79731750488281, + "rewards/real": -109.99860382080078, + "step": 55 + }, + { + "epoch": 0.12, + "grad_norm": 14.116545991408064, + "learning_rate": 4.995730970248893e-07, + "logits/generated": -2.3119473457336426, + "logits/oppo_generated": -2.819808006286621, + "logits/oppo_real": -2.7743167877197266, + "logits/real": -2.46488094329834, + "logps/generated": -294.59149169921875, + "logps/oppo_gen": -84.80085754394531, + "logps/oppo_real": -403.3960266113281, + "logps/real": -471.69610595703125, + "loss": 1.0333, + "loss/gen": 0.48806941509246826, + "loss/real": 0.38371366262435913, + "rewards/accuracies": 1.0, + "rewards/generated": -209.79061889648438, + "rewards/margins": 141.4905548095703, + "rewards/real": -68.30006408691406, + "step": 56 + }, + { + "epoch": 0.12, + "grad_norm": 11.833621885694749, + "learning_rate": 4.994597417800523e-07, + "logits/generated": -2.383392572402954, + "logits/oppo_generated": -2.7924365997314453, + "logits/oppo_real": -2.751426935195923, + "logits/real": -2.4370484352111816, + "logps/generated": -281.76629638671875, + "logps/oppo_gen": -79.0185775756836, + "logps/oppo_real": -225.24346923828125, + "logps/real": -324.58514404296875, + "loss": 1.035, + "loss/gen": 0.48555830121040344, + "loss/real": 0.48214584589004517, + "rewards/accuracies": 1.0, + "rewards/generated": -202.74771118164062, + "rewards/margins": 103.40604400634766, + "rewards/real": -99.34165954589844, + "step": 57 + }, + { + "epoch": 0.12, + "grad_norm": 10.633234759823537, + "learning_rate": 4.993330709158879e-07, + "logits/generated": -2.333850145339966, + "logits/oppo_generated": -2.8365046977996826, + "logits/oppo_real": -2.7985291481018066, + "logits/real": -2.4481301307678223, + "logps/generated": -275.4985046386719, + "logps/oppo_gen": -82.01077270507812, + "logps/oppo_real": -335.5836486816406, + "logps/real": -424.78973388671875, + "loss": 0.992, + "loss/gen": 0.5070739388465881, + "loss/real": 0.4416646659374237, + "rewards/accuracies": 1.0, + "rewards/generated": -193.4877471923828, + "rewards/margins": 104.28164672851562, + "rewards/real": -89.20610046386719, + "step": 58 + }, + { + "epoch": 0.12, + "grad_norm": 10.303258835881012, + "learning_rate": 4.991930911938115e-07, + "logits/generated": -2.3403124809265137, + "logits/oppo_generated": -2.5403761863708496, + "logits/oppo_real": -2.8262884616851807, + "logits/real": -2.159945487976074, + "logps/generated": -257.6687316894531, + "logps/oppo_gen": -65.96223449707031, + "logps/oppo_real": -207.23001098632812, + "logps/real": -311.77996826171875, + "loss": 0.9855, + "loss/gen": 0.5228403806686401, + "loss/real": 0.5156495571136475, + "rewards/accuracies": 0.9375, + "rewards/generated": -191.70651245117188, + "rewards/margins": 87.15653991699219, + "rewards/real": -104.54997253417969, + "step": 59 + }, + { + "epoch": 0.13, + "grad_norm": 10.776866961748691, + "learning_rate": 4.990398100856366e-07, + "logits/generated": -2.2386527061462402, + "logits/oppo_generated": -2.676912307739258, + "logits/oppo_real": -2.7533621788024902, + "logits/real": -2.286830186843872, + "logps/generated": -357.25042724609375, + "logps/oppo_gen": -91.29155731201172, + "logps/oppo_real": -219.24154663085938, + "logps/real": -348.81939697265625, + "loss": 0.9867, + "loss/gen": 0.3898318409919739, + "loss/real": 0.6121199727058411, + "rewards/accuracies": 1.0, + "rewards/generated": -265.9588623046875, + "rewards/margins": 136.38104248046875, + "rewards/real": -129.57781982421875, + "step": 60 + }, + { + "epoch": 0.13, + "grad_norm": 11.787518278170479, + "learning_rate": 4.988732357731762e-07, + "logits/generated": -2.2177071571350098, + "logits/oppo_generated": -2.8242249488830566, + "logits/oppo_real": -2.6713385581970215, + "logits/real": -2.442739963531494, + "logps/generated": -307.24603271484375, + "logps/oppo_gen": -86.80853271484375, + "logps/oppo_real": -296.34722900390625, + "logps/real": -410.3054504394531, + "loss": 0.9475, + "loss/gen": 0.4432734251022339, + "loss/real": 0.5487948656082153, + "rewards/accuracies": 1.0, + "rewards/generated": -220.4375, + "rewards/margins": 106.47928619384766, + "rewards/real": -113.95822143554688, + "step": 61 + }, + { + "epoch": 0.13, + "grad_norm": 10.84077487554943, + "learning_rate": 4.986933771478051e-07, + "logits/generated": -2.376708984375, + "logits/oppo_generated": -2.7960495948791504, + "logits/oppo_real": -2.914412260055542, + "logits/real": -2.3897695541381836, + "logps/generated": -300.8831787109375, + "logps/oppo_gen": -86.83887481689453, + "logps/oppo_real": -453.4812927246094, + "logps/real": -554.751953125, + "loss": 0.9287, + "loss/gen": 0.4425833523273468, + "loss/real": 0.5028396248817444, + "rewards/accuracies": 0.875, + "rewards/generated": -214.0443115234375, + "rewards/margins": 112.77363586425781, + "rewards/real": -101.27067565917969, + "step": 62 + }, + { + "epoch": 0.13, + "grad_norm": 11.885957501092347, + "learning_rate": 4.985002438099865e-07, + "logits/generated": -2.196812629699707, + "logits/oppo_generated": -2.7576608657836914, + "logits/oppo_real": -2.674570083618164, + "logits/real": -2.3408308029174805, + "logps/generated": -318.3380126953125, + "logps/oppo_gen": -76.6629409790039, + "logps/oppo_real": -243.74818420410156, + "logps/real": -356.16204833984375, + "loss": 0.9311, + "loss/gen": 0.3887555003166199, + "loss/real": 0.5405441522598267, + "rewards/accuracies": 0.9375, + "rewards/generated": -241.6750946044922, + "rewards/margins": 129.26123046875, + "rewards/real": -112.41386413574219, + "step": 63 + }, + { + "epoch": 0.13, + "grad_norm": 19.176210542785817, + "learning_rate": 4.982938460687582e-07, + "logits/generated": -2.14847469329834, + "logits/oppo_generated": -2.658493995666504, + "logits/oppo_real": -2.693326950073242, + "logits/real": -2.232679843902588, + "logps/generated": -289.572509765625, + "logps/oppo_gen": -74.29948425292969, + "logps/oppo_real": -365.4249267578125, + "logps/real": -468.91778564453125, + "loss": 1.0004, + "loss/gen": 0.4416448771953583, + "loss/real": 0.5346858501434326, + "rewards/accuracies": 0.9375, + "rewards/generated": -215.27301025390625, + "rewards/margins": 111.78018188476562, + "rewards/real": -103.4928207397461, + "step": 64 + }, + { + "epoch": 0.14, + "grad_norm": 12.604560850090447, + "learning_rate": 4.980741949411839e-07, + "logits/generated": -2.200733184814453, + "logits/oppo_generated": -2.8042337894439697, + "logits/oppo_real": -2.743964195251465, + "logits/real": -2.2919199466705322, + "logps/generated": -308.5362548828125, + "logps/oppo_gen": -76.34394073486328, + "logps/oppo_real": -196.71514892578125, + "logps/real": -299.24163818359375, + "loss": 0.9107, + "loss/gen": 0.439577579498291, + "loss/real": 0.5086124539375305, + "rewards/accuracies": 1.0, + "rewards/generated": -232.19229125976562, + "rewards/margins": 129.66580200195312, + "rewards/real": -102.5264892578125, + "step": 65 + }, + { + "epoch": 0.14, + "grad_norm": 14.953938779939541, + "learning_rate": 4.978413021517633e-07, + "logits/generated": -2.2938356399536133, + "logits/oppo_generated": -2.7946197986602783, + "logits/oppo_real": -2.7596311569213867, + "logits/real": -2.365877389907837, + "logps/generated": -342.2618713378906, + "logps/oppo_gen": -103.196044921875, + "logps/oppo_real": -267.36993408203125, + "logps/real": -376.911376953125, + "loss": 0.892, + "loss/gen": 0.3898586630821228, + "loss/real": 0.5500133037567139, + "rewards/accuracies": 0.9375, + "rewards/generated": -239.06582641601562, + "rewards/margins": 129.5243682861328, + "rewards/real": -109.54145812988281, + "step": 66 + }, + { + "epoch": 0.14, + "grad_norm": 20.73778569701294, + "learning_rate": 4.975951801318083e-07, + "logits/generated": -2.0994839668273926, + "logits/oppo_generated": -2.7236528396606445, + "logits/oppo_real": -2.8300952911376953, + "logits/real": -2.186398506164551, + "logps/generated": -274.72259521484375, + "logps/oppo_gen": -66.93666076660156, + "logps/oppo_real": -370.18499755859375, + "logps/real": -475.4491882324219, + "loss": 0.8971, + "loss/gen": 0.4556940495967865, + "loss/real": 0.5313321948051453, + "rewards/accuracies": 0.9375, + "rewards/generated": -207.78594970703125, + "rewards/margins": 102.52177429199219, + "rewards/real": -105.26417541503906, + "step": 67 + }, + { + "epoch": 0.14, + "grad_norm": 17.5792161119695, + "learning_rate": 4.973358420187775e-07, + "logits/generated": -2.248991012573242, + "logits/oppo_generated": -2.843545436859131, + "logits/oppo_real": -2.811431407928467, + "logits/real": -2.3350868225097656, + "logps/generated": -321.4705505371094, + "logps/oppo_gen": -81.84077453613281, + "logps/oppo_real": -353.8198547363281, + "logps/real": -448.204345703125, + "loss": 0.8387, + "loss/gen": 0.3652092218399048, + "loss/real": 0.4975828528404236, + "rewards/accuracies": 0.9375, + "rewards/generated": -239.62977600097656, + "rewards/margins": 145.24526977539062, + "rewards/real": -94.384521484375, + "step": 68 + }, + { + "epoch": 0.14, + "grad_norm": 34.968975158457425, + "learning_rate": 4.970633016555764e-07, + "logits/generated": -2.250572681427002, + "logits/oppo_generated": -2.743389368057251, + "logits/oppo_real": -2.8539438247680664, + "logits/real": -2.277600049972534, + "logps/generated": -332.234375, + "logps/oppo_gen": -81.96417236328125, + "logps/oppo_real": -289.67822265625, + "logps/real": -347.64324951171875, + "loss": 0.8749, + "loss/gen": 0.4391787052154541, + "loss/real": 0.34725236892700195, + "rewards/accuracies": 1.0, + "rewards/generated": -250.27023315429688, + "rewards/margins": 192.30517578125, + "rewards/real": -57.96504211425781, + "step": 69 + }, + { + "epoch": 0.15, + "grad_norm": 21.457009560452356, + "learning_rate": 4.967775735898179e-07, + "logits/generated": -2.1518430709838867, + "logits/oppo_generated": -2.724855422973633, + "logits/oppo_real": -2.7126691341400146, + "logits/real": -2.200214147567749, + "logps/generated": -317.98663330078125, + "logps/oppo_gen": -72.91289520263672, + "logps/oppo_real": -194.3795623779297, + "logps/real": -256.5523681640625, + "loss": 0.829, + "loss/gen": 0.38895365595817566, + "loss/real": 0.3911527395248413, + "rewards/accuracies": 1.0, + "rewards/generated": -245.07374572753906, + "rewards/margins": 182.90090942382812, + "rewards/real": -62.17283630371094, + "step": 70 + }, + { + "epoch": 0.15, + "grad_norm": 16.85674377781829, + "learning_rate": 4.964786730730454e-07, + "logits/generated": -1.9712395668029785, + "logits/oppo_generated": -2.6243536472320557, + "logits/oppo_real": -2.568087100982666, + "logits/real": -2.1335487365722656, + "logps/generated": -288.0013732910156, + "logps/oppo_gen": -69.08726501464844, + "logps/oppo_real": -247.77418518066406, + "logps/real": -316.950927734375, + "loss": 0.8361, + "loss/gen": 0.42092961072921753, + "loss/real": 0.40566056966781616, + "rewards/accuracies": 0.9375, + "rewards/generated": -218.91412353515625, + "rewards/margins": 149.7374267578125, + "rewards/real": -69.17670440673828, + "step": 71 + }, + { + "epoch": 0.15, + "grad_norm": 18.339407359690973, + "learning_rate": 4.961666160599197e-07, + "logits/generated": -2.207213878631592, + "logits/oppo_generated": -2.8543405532836914, + "logits/oppo_real": -2.865431785583496, + "logits/real": -2.299715757369995, + "logps/generated": -325.899658203125, + "logps/oppo_gen": -78.86032104492188, + "logps/oppo_real": -285.64013671875, + "logps/real": -355.99383544921875, + "loss": 0.8282, + "loss/gen": 0.3996672034263611, + "loss/real": 0.41223788261413574, + "rewards/accuracies": 0.9375, + "rewards/generated": -247.03933715820312, + "rewards/margins": 176.6856689453125, + "rewards/real": -70.35367584228516, + "step": 72 + }, + { + "epoch": 0.15, + "grad_norm": 20.965985316297516, + "learning_rate": 4.958414192073665e-07, + "logits/generated": -2.016292095184326, + "logits/oppo_generated": -2.547126531600952, + "logits/oppo_real": -2.573683738708496, + "logits/real": -2.0639572143554688, + "logps/generated": -343.04656982421875, + "logps/oppo_gen": -78.18771362304688, + "logps/oppo_real": -325.4617614746094, + "logps/real": -398.49114990234375, + "loss": 0.8034, + "loss/gen": 0.39308467507362366, + "loss/real": 0.4334869384765625, + "rewards/accuracies": 1.0, + "rewards/generated": -264.8587951660156, + "rewards/margins": 191.82943725585938, + "rewards/real": -73.02937316894531, + "step": 73 + }, + { + "epoch": 0.15, + "grad_norm": 17.53030039345492, + "learning_rate": 4.955030998736876e-07, + "logits/generated": -2.1453638076782227, + "logits/oppo_generated": -2.717832326889038, + "logits/oppo_real": -2.810462474822998, + "logits/real": -2.1777830123901367, + "logps/generated": -310.4494323730469, + "logps/oppo_gen": -75.6651382446289, + "logps/oppo_real": -284.924072265625, + "logps/real": -360.6187744140625, + "loss": 0.8214, + "loss/gen": 0.37556761503219604, + "loss/real": 0.45106858015060425, + "rewards/accuracies": 1.0, + "rewards/generated": -234.78427124023438, + "rewards/margins": 159.08956909179688, + "rewards/real": -75.69470977783203, + "step": 74 + }, + { + "epoch": 0.16, + "grad_norm": 17.718348691994233, + "learning_rate": 4.951516761176343e-07, + "logits/generated": -2.3389203548431396, + "logits/oppo_generated": -3.063918113708496, + "logits/oppo_real": -3.1160035133361816, + "logits/real": -2.4417004585266113, + "logps/generated": -420.660400390625, + "logps/oppo_gen": -91.83782958984375, + "logps/oppo_real": -399.8033447265625, + "logps/real": -444.0775451660156, + "loss": 0.7504, + "loss/gen": 0.2812493145465851, + "loss/real": 0.33059465885162354, + "rewards/accuracies": 1.0, + "rewards/generated": -328.8226318359375, + "rewards/margins": 284.54840087890625, + "rewards/real": -44.274192810058594, + "step": 75 + }, + { + "epoch": 0.16, + "grad_norm": 17.563640733916134, + "learning_rate": 4.947871666974437e-07, + "logits/generated": -1.8955752849578857, + "logits/oppo_generated": -2.6598410606384277, + "logits/oppo_real": -2.6542224884033203, + "logits/real": -2.035309314727783, + "logps/generated": -412.85662841796875, + "logps/oppo_gen": -67.25988006591797, + "logps/oppo_real": -283.60589599609375, + "logps/real": -359.90924072265625, + "loss": 0.7995, + "loss/gen": 0.4136648178100586, + "loss/real": 0.45442986488342285, + "rewards/accuracies": 0.9375, + "rewards/generated": -345.59674072265625, + "rewards/margins": 269.29339599609375, + "rewards/real": -76.3033447265625, + "step": 76 + }, + { + "epoch": 0.16, + "grad_norm": 21.040827708625496, + "learning_rate": 4.944095910698372e-07, + "logits/generated": -2.041642189025879, + "logits/oppo_generated": -2.7393393516540527, + "logits/oppo_real": -2.783273458480835, + "logits/real": -2.1023669242858887, + "logps/generated": -362.0621643066406, + "logps/oppo_gen": -127.53181457519531, + "logps/oppo_real": -495.255859375, + "logps/real": -547.227294921875, + "loss": 0.754, + "loss/gen": 0.3859551250934601, + "loss/real": 0.3337196409702301, + "rewards/accuracies": 1.0, + "rewards/generated": -234.53038024902344, + "rewards/margins": 182.55897521972656, + "rewards/real": -51.97139358520508, + "step": 77 + }, + { + "epoch": 0.16, + "grad_norm": 18.349379759240588, + "learning_rate": 4.940189693889818e-07, + "logits/generated": -1.6511880159378052, + "logits/oppo_generated": -2.492225408554077, + "logits/oppo_real": -2.408968210220337, + "logits/real": -1.7214398384094238, + "logps/generated": -343.68011474609375, + "logps/oppo_gen": -74.83660888671875, + "logps/oppo_real": -254.16744995117188, + "logps/real": -314.009521484375, + "loss": 0.7448, + "loss/gen": 0.28272247314453125, + "loss/real": 0.38186439871788025, + "rewards/accuracies": 1.0, + "rewards/generated": -268.843505859375, + "rewards/margins": 209.00146484375, + "rewards/real": -59.842044830322266, + "step": 78 + }, + { + "epoch": 0.17, + "grad_norm": 32.05865062784341, + "learning_rate": 4.936153225054146e-07, + "logits/generated": -1.8036625385284424, + "logits/oppo_generated": -2.690124988555908, + "logits/oppo_real": -2.757858991622925, + "logits/real": -1.9343822002410889, + "logps/generated": -343.80975341796875, + "logps/oppo_gen": -73.35165405273438, + "logps/oppo_real": -301.7453918457031, + "logps/real": -369.839599609375, + "loss": 0.7582, + "loss/gen": 0.2956180274486542, + "loss/real": 0.4004019796848297, + "rewards/accuracies": 1.0, + "rewards/generated": -270.45806884765625, + "rewards/margins": 202.3638458251953, + "rewards/real": -68.09424591064453, + "step": 79 + }, + { + "epoch": 0.17, + "grad_norm": 15.991292491442804, + "learning_rate": 4.931986719649298e-07, + "logits/generated": -1.7580184936523438, + "logits/oppo_generated": -2.8001205921173096, + "logits/oppo_real": -2.768902540206909, + "logits/real": -1.9494829177856445, + "logps/generated": -286.70635986328125, + "logps/oppo_gen": -60.519004821777344, + "logps/oppo_real": -291.74224853515625, + "logps/real": -375.76318359375, + "loss": 0.7548, + "loss/gen": 0.430566668510437, + "loss/real": 0.5071053504943848, + "rewards/accuracies": 0.9375, + "rewards/generated": -226.18736267089844, + "rewards/margins": 142.16641235351562, + "rewards/real": -84.02093505859375, + "step": 80 + }, + { + "epoch": 0.17, + "grad_norm": 35.05248053421558, + "learning_rate": 4.927690400074286e-07, + "logits/generated": -1.9403841495513916, + "logits/oppo_generated": -2.788212776184082, + "logits/oppo_real": -2.712280750274658, + "logits/real": -2.0578057765960693, + "logps/generated": -382.52056884765625, + "logps/oppo_gen": -89.67400360107422, + "logps/oppo_real": -264.5151672363281, + "logps/real": -327.13836669921875, + "loss": 0.8402, + "loss/gen": 0.41186997294425964, + "loss/real": 0.36961644887924194, + "rewards/accuracies": 0.9375, + "rewards/generated": -292.8465576171875, + "rewards/margins": 230.22337341308594, + "rewards/real": -62.62320327758789, + "step": 81 + }, + { + "epoch": 0.17, + "grad_norm": 30.86921322604955, + "learning_rate": 4.923264495657319e-07, + "logits/generated": -1.8421218395233154, + "logits/oppo_generated": -2.6853179931640625, + "logits/oppo_real": -2.8702688217163086, + "logits/real": -1.8912787437438965, + "logps/generated": -324.0174560546875, + "logps/oppo_gen": -78.56639099121094, + "logps/oppo_real": -319.54541015625, + "logps/real": -412.1978759765625, + "loss": 0.8296, + "loss/gen": 0.3521851897239685, + "loss/real": 0.5055232644081116, + "rewards/accuracies": 1.0, + "rewards/generated": -245.4510498046875, + "rewards/margins": 152.798583984375, + "rewards/real": -92.65247344970703, + "step": 82 + }, + { + "epoch": 0.17, + "grad_norm": 20.8992282268632, + "learning_rate": 4.918709242643563e-07, + "logits/generated": -1.789534330368042, + "logits/oppo_generated": -2.82261323928833, + "logits/oppo_real": -2.704563617706299, + "logits/real": -1.9741158485412598, + "logps/generated": -350.99957275390625, + "logps/oppo_gen": -68.2770767211914, + "logps/oppo_real": -239.40855407714844, + "logps/real": -317.4848937988281, + "loss": 0.756, + "loss/gen": 0.3403918147087097, + "loss/real": 0.4584280252456665, + "rewards/accuracies": 0.9375, + "rewards/generated": -282.72247314453125, + "rewards/margins": 204.64617919921875, + "rewards/real": -78.07632446289062, + "step": 83 + }, + { + "epoch": 0.18, + "grad_norm": 25.62434021635362, + "learning_rate": 4.914024884182534e-07, + "logits/generated": -1.859442114830017, + "logits/oppo_generated": -2.74169921875, + "logits/oppo_real": -2.7226579189300537, + "logits/real": -1.9540834426879883, + "logps/generated": -345.9833068847656, + "logps/oppo_gen": -60.66720962524414, + "logps/oppo_real": -155.5846405029297, + "logps/real": -249.47677612304688, + "loss": 0.7579, + "loss/gen": 0.3146398961544037, + "loss/real": 0.5418342351913452, + "rewards/accuracies": 1.0, + "rewards/generated": -285.31610107421875, + "rewards/margins": 191.4239501953125, + "rewards/real": -93.89213562011719, + "step": 84 + }, + { + "epoch": 0.18, + "grad_norm": 30.52967866350504, + "learning_rate": 4.909211670315114e-07, + "logits/generated": -1.861513376235962, + "logits/oppo_generated": -2.698122501373291, + "logits/oppo_real": -2.8987860679626465, + "logits/real": -1.883429765701294, + "logps/generated": -399.5763244628906, + "logps/oppo_gen": -75.33815002441406, + "logps/oppo_real": -307.6024169921875, + "logps/real": -379.11126708984375, + "loss": 0.7771, + "loss/gen": 0.29335951805114746, + "loss/real": 0.470558226108551, + "rewards/accuracies": 0.9375, + "rewards/generated": -324.2381591796875, + "rewards/margins": 252.7292938232422, + "rewards/real": -71.50885772705078, + "step": 85 + }, + { + "epoch": 0.18, + "grad_norm": 25.632891077294982, + "learning_rate": 4.904269857960208e-07, + "logits/generated": -1.8649256229400635, + "logits/oppo_generated": -2.7086257934570312, + "logits/oppo_real": -2.7736144065856934, + "logits/real": -1.9235105514526367, + "logps/generated": -334.47784423828125, + "logps/oppo_gen": -64.02923583984375, + "logps/oppo_real": -219.5755157470703, + "logps/real": -252.96922302246094, + "loss": 0.7185, + "loss/gen": 0.3252614736557007, + "loss/real": 0.29650163650512695, + "rewards/accuracies": 1.0, + "rewards/generated": -270.4486083984375, + "rewards/margins": 237.05490112304688, + "rewards/real": -33.393707275390625, + "step": 86 + }, + { + "epoch": 0.18, + "grad_norm": 23.153695772590766, + "learning_rate": 4.899199710901028e-07, + "logits/generated": -1.6987268924713135, + "logits/oppo_generated": -2.6510324478149414, + "logits/oppo_real": -2.6278481483459473, + "logits/real": -1.944386601448059, + "logps/generated": -371.7005615234375, + "logps/oppo_gen": -90.28435516357422, + "logps/oppo_real": -405.62408447265625, + "logps/real": -452.80035400390625, + "loss": 0.7195, + "loss/gen": 0.3139500916004181, + "loss/real": 0.33689984679222107, + "rewards/accuracies": 0.9375, + "rewards/generated": -281.41619873046875, + "rewards/margins": 234.23989868164062, + "rewards/real": -47.17626190185547, + "step": 87 + }, + { + "epoch": 0.18, + "grad_norm": 29.933416099971993, + "learning_rate": 4.894001499771015e-07, + "logits/generated": -1.8620039224624634, + "logits/oppo_generated": -2.7284958362579346, + "logits/oppo_real": -2.8318753242492676, + "logits/real": -1.9367091655731201, + "logps/generated": -318.0210266113281, + "logps/oppo_gen": -71.02044677734375, + "logps/oppo_real": -323.9693603515625, + "logps/real": -344.6488037109375, + "loss": 0.7011, + "loss/gen": 0.34499967098236084, + "loss/real": 0.25943779945373535, + "rewards/accuracies": 1.0, + "rewards/generated": -247.00057983398438, + "rewards/margins": 226.32110595703125, + "rewards/real": -20.679473876953125, + "step": 88 + }, + { + "epoch": 0.19, + "grad_norm": 30.951742696923585, + "learning_rate": 4.888675502039391e-07, + "logits/generated": -1.5965911149978638, + "logits/oppo_generated": -2.762685537338257, + "logits/oppo_real": -2.5824837684631348, + "logits/real": -1.8686351776123047, + "logps/generated": -281.6763000488281, + "logps/oppo_gen": -63.627159118652344, + "logps/oppo_real": -235.63560485839844, + "logps/real": -290.3897705078125, + "loss": 0.7559, + "loss/gen": 0.48157230019569397, + "loss/real": 0.40050894021987915, + "rewards/accuracies": 0.9375, + "rewards/generated": -218.0491180419922, + "rewards/margins": 163.29498291015625, + "rewards/real": -54.75415802001953, + "step": 89 + }, + { + "epoch": 0.19, + "grad_norm": 24.58686594608555, + "learning_rate": 4.883222001996351e-07, + "logits/generated": -1.3386802673339844, + "logits/oppo_generated": -2.5235419273376465, + "logits/oppo_real": -2.3336281776428223, + "logits/real": -1.5898027420043945, + "logps/generated": -390.20355224609375, + "logps/oppo_gen": -76.78201293945312, + "logps/oppo_real": -216.29495239257812, + "logps/real": -296.91888427734375, + "loss": 0.7039, + "loss/gen": 0.25537049770355225, + "loss/real": 0.4638826549053192, + "rewards/accuracies": 0.9375, + "rewards/generated": -313.4215393066406, + "rewards/margins": 232.797607421875, + "rewards/real": -80.62393188476562, + "step": 90 + }, + { + "epoch": 0.19, + "grad_norm": 23.052282204670107, + "learning_rate": 4.877641290737883e-07, + "logits/generated": -1.6338614225387573, + "logits/oppo_generated": -2.6937649250030518, + "logits/oppo_real": -2.8629989624023438, + "logits/real": -1.6108810901641846, + "logps/generated": -389.9326171875, + "logps/oppo_gen": -72.10958862304688, + "logps/oppo_real": -217.7257537841797, + "logps/real": -288.9909973144531, + "loss": 0.7187, + "loss/gen": 0.26666751503944397, + "loss/real": 0.4401911199092865, + "rewards/accuracies": 1.0, + "rewards/generated": -317.822998046875, + "rewards/margins": 246.55776977539062, + "rewards/real": -71.26525115966797, + "step": 91 + }, + { + "epoch": 0.19, + "grad_norm": 20.153327670906794, + "learning_rate": 4.871933666150239e-07, + "logits/generated": -1.5877046585083008, + "logits/oppo_generated": -2.700439453125, + "logits/oppo_real": -2.8355603218078613, + "logits/real": -1.638826847076416, + "logps/generated": -360.88323974609375, + "logps/oppo_gen": -71.94976806640625, + "logps/oppo_real": -317.441650390625, + "logps/real": -369.97760009765625, + "loss": 0.7454, + "loss/gen": 0.26692843437194824, + "loss/real": 0.3744610548019409, + "rewards/accuracies": 0.9375, + "rewards/generated": -288.9334716796875, + "rewards/margins": 236.39752197265625, + "rewards/real": -52.53595733642578, + "step": 92 + }, + { + "epoch": 0.19, + "grad_norm": 26.06467902499053, + "learning_rate": 4.866099432894024e-07, + "logits/generated": -1.3850927352905273, + "logits/oppo_generated": -2.61204195022583, + "logits/oppo_real": -2.505739212036133, + "logits/real": -1.6025258302688599, + "logps/generated": -427.29150390625, + "logps/oppo_gen": -96.4445571899414, + "logps/oppo_real": -350.4456787109375, + "logps/real": -403.288818359375, + "loss": 0.7432, + "loss/gen": 0.22171354293823242, + "loss/real": 0.3635830879211426, + "rewards/accuracies": 1.0, + "rewards/generated": -330.846923828125, + "rewards/margins": 278.0038146972656, + "rewards/real": -52.843109130859375, + "step": 93 + }, + { + "epoch": 0.2, + "grad_norm": 38.3231737909853, + "learning_rate": 4.860138902387939e-07, + "logits/generated": -1.5998806953430176, + "logits/oppo_generated": -2.6566057205200195, + "logits/oppo_real": -2.782456874847412, + "logits/real": -1.706424593925476, + "logps/generated": -365.70166015625, + "logps/oppo_gen": -71.42315673828125, + "logps/oppo_real": -228.60372924804688, + "logps/real": -270.2125244140625, + "loss": 0.673, + "loss/gen": 0.3238295912742615, + "loss/real": 0.32134318351745605, + "rewards/accuracies": 0.9375, + "rewards/generated": -294.27850341796875, + "rewards/margins": 252.66970825195312, + "rewards/real": -41.60879135131836, + "step": 94 + }, + { + "epoch": 0.2, + "grad_norm": 14.568084300970918, + "learning_rate": 4.854052392792161e-07, + "logits/generated": -1.7710530757904053, + "logits/oppo_generated": -2.799525499343872, + "logits/oppo_real": -2.799852132797241, + "logits/real": -1.9224238395690918, + "logps/generated": -359.0024108886719, + "logps/oppo_gen": -80.63153076171875, + "logps/oppo_real": -294.978759765625, + "logps/real": -347.17596435546875, + "loss": 0.6688, + "loss/gen": 0.2978823482990265, + "loss/real": 0.34458765387535095, + "rewards/accuracies": 1.0, + "rewards/generated": -278.370849609375, + "rewards/margins": 226.17367553710938, + "rewards/real": -52.19721221923828, + "step": 95 + }, + { + "epoch": 0.2, + "grad_norm": 22.049713838757494, + "learning_rate": 4.847840228991356e-07, + "logits/generated": -1.7129011154174805, + "logits/oppo_generated": -2.7052841186523438, + "logits/oppo_real": -2.9764838218688965, + "logits/real": -1.8225359916687012, + "logps/generated": -321.9461669921875, + "logps/oppo_gen": -64.55047607421875, + "logps/oppo_real": -324.9617004394531, + "logps/real": -359.5166320800781, + "loss": 0.7227, + "loss/gen": 0.34642934799194336, + "loss/real": 0.3209618330001831, + "rewards/accuracies": 0.9375, + "rewards/generated": -257.39569091796875, + "rewards/margins": 222.84072875976562, + "rewards/real": -34.55495834350586, + "step": 96 + }, + { + "epoch": 0.2, + "grad_norm": 22.99331169737191, + "learning_rate": 4.841502742577338e-07, + "logits/generated": -1.6851990222930908, + "logits/oppo_generated": -2.788656234741211, + "logits/oppo_real": -2.713801383972168, + "logits/real": -1.8722307682037354, + "logps/generated": -351.01678466796875, + "logps/oppo_gen": -59.00885009765625, + "logps/oppo_real": -176.6733856201172, + "logps/real": -210.3897705078125, + "loss": 0.6606, + "loss/gen": 0.41348496079444885, + "loss/real": 0.29584217071533203, + "rewards/accuracies": 1.0, + "rewards/generated": -292.0079650878906, + "rewards/margins": 258.2915344238281, + "rewards/real": -33.716373443603516, + "step": 97 + }, + { + "epoch": 0.21, + "grad_norm": 34.593367135979804, + "learning_rate": 4.83504027183137e-07, + "logits/generated": -1.626028060913086, + "logits/oppo_generated": -2.680184841156006, + "logits/oppo_real": -2.639242649078369, + "logits/real": -1.781846523284912, + "logps/generated": -338.901611328125, + "logps/oppo_gen": -63.749298095703125, + "logps/oppo_real": -225.60980224609375, + "logps/real": -232.88375854492188, + "loss": 0.6294, + "loss/gen": 0.3008587062358856, + "loss/real": 0.2209412306547165, + "rewards/accuracies": 1.0, + "rewards/generated": -275.15234375, + "rewards/margins": 267.8783874511719, + "rewards/real": -7.2739667892456055, + "step": 98 + }, + { + "epoch": 0.21, + "grad_norm": 24.448282268073047, + "learning_rate": 4.828453161706108e-07, + "logits/generated": -1.6220470666885376, + "logits/oppo_generated": -2.674605369567871, + "logits/oppo_real": -2.5424935817718506, + "logits/real": -1.8579437732696533, + "logps/generated": -359.24969482421875, + "logps/oppo_gen": -88.02183532714844, + "logps/oppo_real": -287.251953125, + "logps/real": -309.96124267578125, + "loss": 0.6726, + "loss/gen": 0.3974546194076538, + "loss/real": 0.28890979290008545, + "rewards/accuracies": 1.0, + "rewards/generated": -271.22784423828125, + "rewards/margins": 248.51858520507812, + "rewards/real": -22.709251403808594, + "step": 99 + }, + { + "epoch": 0.21, + "grad_norm": 28.265135904765145, + "learning_rate": 4.821741763807186e-07, + "logits/generated": -1.6087634563446045, + "logits/oppo_generated": -2.742450714111328, + "logits/oppo_real": -2.764233350753784, + "logits/real": -1.770212173461914, + "logps/generated": -427.012939453125, + "logps/oppo_gen": -84.33467102050781, + "logps/oppo_real": -317.5994873046875, + "logps/real": -369.8688049316406, + "loss": 0.6712, + "loss/gen": 0.24097959697246552, + "loss/real": 0.36573004722595215, + "rewards/accuracies": 0.9375, + "rewards/generated": -342.6782531738281, + "rewards/margins": 290.408935546875, + "rewards/real": -52.26934051513672, + "step": 100 + }, + { + "epoch": 0.21, + "grad_norm": 31.218570132114476, + "learning_rate": 4.81490643637445e-07, + "logits/generated": -1.654707908630371, + "logits/oppo_generated": -2.7942566871643066, + "logits/oppo_real": -2.9361443519592285, + "logits/real": -1.663029432296753, + "logps/generated": -362.0345458984375, + "logps/oppo_gen": -62.20787048339844, + "logps/oppo_real": -250.55889892578125, + "logps/real": -306.2510681152344, + "loss": 0.6456, + "loss/gen": 0.23637422919273376, + "loss/real": 0.411770224571228, + "rewards/accuracies": 1.0, + "rewards/generated": -299.8266906738281, + "rewards/margins": 244.13455200195312, + "rewards/real": -55.69215393066406, + "step": 101 + }, + { + "epoch": 0.21, + "grad_norm": 18.808293984501145, + "learning_rate": 4.807947544262838e-07, + "logits/generated": -1.5722711086273193, + "logits/oppo_generated": -2.7636303901672363, + "logits/oppo_real": -2.752176284790039, + "logits/real": -1.6461234092712402, + "logps/generated": -319.77374267578125, + "logps/oppo_gen": -56.84593963623047, + "logps/oppo_real": -185.9375457763672, + "logps/real": -256.1296691894531, + "loss": 0.661, + "loss/gen": 0.32895606756210327, + "loss/real": 0.4661652743816376, + "rewards/accuracies": 0.9375, + "rewards/generated": -262.9277648925781, + "rewards/margins": 192.73565673828125, + "rewards/real": -70.19212341308594, + "step": 102 + }, + { + "epoch": 0.22, + "grad_norm": 16.356918952156665, + "learning_rate": 4.800865458922898e-07, + "logits/generated": -1.1754525899887085, + "logits/oppo_generated": -2.619927406311035, + "logits/oppo_real": -2.643867015838623, + "logits/real": -1.3315857648849487, + "logps/generated": -441.89556884765625, + "logps/oppo_gen": -69.87133026123047, + "logps/oppo_real": -264.7545166015625, + "logps/real": -286.8203430175781, + "loss": 0.6036, + "loss/gen": 0.3638154864311218, + "loss/real": 0.2782540023326874, + "rewards/accuracies": 1.0, + "rewards/generated": -372.0242004394531, + "rewards/margins": 349.9583435058594, + "rewards/real": -22.065845489501953, + "step": 103 + }, + { + "epoch": 0.22, + "grad_norm": 33.94130891954338, + "learning_rate": 4.793660558380969e-07, + "logits/generated": -1.2376275062561035, + "logits/oppo_generated": -2.6367125511169434, + "logits/oppo_real": -2.6506056785583496, + "logits/real": -1.3006834983825684, + "logps/generated": -473.9594421386719, + "logps/oppo_gen": -74.07377624511719, + "logps/oppo_real": -268.19134521484375, + "logps/real": -330.2340087890625, + "loss": 0.7282, + "loss/gen": 0.20278650522232056, + "loss/real": 0.44767656922340393, + "rewards/accuracies": 1.0, + "rewards/generated": -399.88568115234375, + "rewards/margins": 337.8430480957031, + "rewards/real": -62.04261779785156, + "step": 104 + }, + { + "epoch": 0.22, + "grad_norm": 41.505814845821604, + "learning_rate": 4.786333227218995e-07, + "logits/generated": -1.4750065803527832, + "logits/oppo_generated": -2.779034376144409, + "logits/oppo_real": -2.8282456398010254, + "logits/real": -1.5095953941345215, + "logps/generated": -399.50592041015625, + "logps/oppo_gen": -69.72903442382812, + "logps/oppo_real": -289.5185546875, + "logps/real": -296.21661376953125, + "loss": 0.5856, + "loss/gen": 0.25725850462913513, + "loss/real": 0.21779251098632812, + "rewards/accuracies": 1.0, + "rewards/generated": -329.77685546875, + "rewards/margins": 323.07879638671875, + "rewards/real": -6.6980767250061035, + "step": 105 + }, + { + "epoch": 0.22, + "grad_norm": 25.561021981262066, + "learning_rate": 4.778883856554003e-07, + "logits/generated": -1.1449687480926514, + "logits/oppo_generated": -2.7481935024261475, + "logits/oppo_real": -2.5869712829589844, + "logits/real": -1.6161012649536133, + "logps/generated": -371.6748352050781, + "logps/oppo_gen": -82.20314025878906, + "logps/oppo_real": -304.43182373046875, + "logps/real": -322.1702880859375, + "loss": 0.5752, + "loss/gen": 0.313020795583725, + "loss/real": 0.24407562613487244, + "rewards/accuracies": 1.0, + "rewards/generated": -289.47174072265625, + "rewards/margins": 271.7332763671875, + "rewards/real": -17.738439559936523, + "step": 106 + }, + { + "epoch": 0.22, + "grad_norm": 32.99757515061239, + "learning_rate": 4.771312844017224e-07, + "logits/generated": -1.1853927373886108, + "logits/oppo_generated": -2.60406494140625, + "logits/oppo_real": -2.690169334411621, + "logits/real": -1.1705992221832275, + "logps/generated": -584.3870849609375, + "logps/oppo_gen": -71.73402404785156, + "logps/oppo_real": -317.8191833496094, + "logps/real": -361.0145263671875, + "loss": 0.6036, + "loss/gen": 0.2634323835372925, + "loss/real": 0.32717156410217285, + "rewards/accuracies": 1.0, + "rewards/generated": -512.653076171875, + "rewards/margins": 469.45770263671875, + "rewards/real": -43.19536209106445, + "step": 107 + }, + { + "epoch": 0.23, + "grad_norm": 27.058304741220393, + "learning_rate": 4.7636205937328664e-07, + "logits/generated": -1.094700813293457, + "logits/oppo_generated": -2.6770029067993164, + "logits/oppo_real": -2.848341703414917, + "logits/real": -1.0369551181793213, + "logps/generated": -383.38958740234375, + "logps/oppo_gen": -69.1045150756836, + "logps/oppo_real": -314.2913818359375, + "logps/real": -380.5789794921875, + "loss": 0.6046, + "loss/gen": 0.2522876262664795, + "loss/real": 0.494897723197937, + "rewards/accuracies": 1.0, + "rewards/generated": -314.28509521484375, + "rewards/margins": 247.99752807617188, + "rewards/real": -66.28755950927734, + "step": 108 + }, + { + "epoch": 0.23, + "grad_norm": 19.27185799288815, + "learning_rate": 4.755807516296547e-07, + "logits/generated": -1.0437504053115845, + "logits/oppo_generated": -2.708833694458008, + "logits/oppo_real": -2.679042339324951, + "logits/real": -1.4015766382217407, + "logps/generated": -431.29931640625, + "logps/oppo_gen": -72.196044921875, + "logps/oppo_real": -315.014404296875, + "logps/real": -354.2579345703125, + "loss": 0.5367, + "loss/gen": 0.19119717180728912, + "loss/real": 0.36264485120773315, + "rewards/accuracies": 0.9375, + "rewards/generated": -359.10321044921875, + "rewards/margins": 319.8597106933594, + "rewards/real": -39.2435302734375, + "step": 109 + }, + { + "epoch": 0.23, + "grad_norm": 25.759154831189385, + "learning_rate": 4.747874028753375e-07, + "logits/generated": -0.6104034185409546, + "logits/oppo_generated": -2.5937318801879883, + "logits/oppo_real": -2.607351303100586, + "logits/real": -0.8924816846847534, + "logps/generated": -444.0633544921875, + "logps/oppo_gen": -83.86407470703125, + "logps/oppo_real": -295.3841857910156, + "logps/real": -332.865966796875, + "loss": 0.5587, + "loss/gen": 0.1575571894645691, + "loss/real": 0.3404185175895691, + "rewards/accuracies": 1.0, + "rewards/generated": -360.19927978515625, + "rewards/margins": 322.71746826171875, + "rewards/real": -37.48179626464844, + "step": 110 + }, + { + "epoch": 0.23, + "grad_norm": 21.85601087073395, + "learning_rate": 4.739820554575686e-07, + "logits/generated": -0.9350783228874207, + "logits/oppo_generated": -2.8147330284118652, + "logits/oppo_real": -2.815829277038574, + "logits/real": -1.2611966133117676, + "logps/generated": -478.8157653808594, + "logps/oppo_gen": -102.38821411132812, + "logps/oppo_real": -428.80462646484375, + "logps/real": -467.30364990234375, + "loss": 0.6008, + "loss/gen": 0.1555887907743454, + "loss/real": 0.3448715806007385, + "rewards/accuracies": 1.0, + "rewards/generated": -376.42755126953125, + "rewards/margins": 337.92852783203125, + "rewards/real": -38.49901580810547, + "step": 111 + }, + { + "epoch": 0.23, + "grad_norm": 30.296640301759844, + "learning_rate": 4.731647523640445e-07, + "logits/generated": -0.2944636642932892, + "logits/oppo_generated": -2.7116103172302246, + "logits/oppo_real": -2.71325421333313, + "logits/real": -0.4511444568634033, + "logps/generated": -401.2589111328125, + "logps/oppo_gen": -63.440895080566406, + "logps/oppo_real": -205.63375854492188, + "logps/real": -252.83216857910156, + "loss": 0.5373, + "loss/gen": 0.2936084270477295, + "loss/real": 0.3831641674041748, + "rewards/accuracies": 0.875, + "rewards/generated": -337.8180236816406, + "rewards/margins": 290.61962890625, + "rewards/real": -47.198387145996094, + "step": 112 + }, + { + "epoch": 0.24, + "grad_norm": 24.15761310635001, + "learning_rate": 4.723355372206297e-07, + "logits/generated": -0.03680907189846039, + "logits/oppo_generated": -2.817870616912842, + "logits/oppo_real": -2.7380404472351074, + "logits/real": -0.25676417350769043, + "logps/generated": -494.8074951171875, + "logps/oppo_gen": -72.92829132080078, + "logps/oppo_real": -280.0667724609375, + "logps/real": -325.4312744140625, + "loss": 0.5344, + "loss/gen": 0.15776054561138153, + "loss/real": 0.35052889585494995, + "rewards/accuracies": 1.0, + "rewards/generated": -421.8791809082031, + "rewards/margins": 376.51470947265625, + "rewards/real": -45.3645133972168, + "step": 113 + }, + { + "epoch": 0.24, + "grad_norm": 24.451979306227695, + "learning_rate": 4.714944542890278e-07, + "logits/generated": -0.45144331455230713, + "logits/oppo_generated": -2.7973763942718506, + "logits/oppo_real": -2.9100446701049805, + "logits/real": 0.06648720800876617, + "logps/generated": -474.79876708984375, + "logps/oppo_gen": -72.39361572265625, + "logps/oppo_real": -261.4820556640625, + "logps/real": -300.52618408203125, + "loss": 0.5192, + "loss/gen": 0.2735764980316162, + "loss/real": 0.327633798122406, + "rewards/accuracies": 1.0, + "rewards/generated": -402.40509033203125, + "rewards/margins": 363.3609619140625, + "rewards/real": -39.04413604736328, + "step": 114 + }, + { + "epoch": 0.24, + "grad_norm": 28.39276507997625, + "learning_rate": 4.706415484644195e-07, + "logits/generated": 0.07198745012283325, + "logits/oppo_generated": -2.7207179069519043, + "logits/oppo_real": -2.880669116973877, + "logits/real": 0.16586969792842865, + "logps/generated": -494.2124328613281, + "logps/oppo_gen": -71.45054626464844, + "logps/oppo_real": -325.3066711425781, + "logps/real": -359.72027587890625, + "loss": 0.5301, + "loss/gen": 0.1617964804172516, + "loss/real": 0.3581461012363434, + "rewards/accuracies": 1.0, + "rewards/generated": -422.7618713378906, + "rewards/margins": 388.34832763671875, + "rewards/real": -34.413551330566406, + "step": 115 + }, + { + "epoch": 0.24, + "grad_norm": 34.45851538047689, + "learning_rate": 4.6977686527306555e-07, + "logits/generated": 0.7090212106704712, + "logits/oppo_generated": -2.6319010257720947, + "logits/oppo_real": -2.643490791320801, + "logits/real": 0.8649207949638367, + "logps/generated": -447.24755859375, + "logps/oppo_gen": -73.28824615478516, + "logps/oppo_real": -283.16064453125, + "logps/real": -304.78131103515625, + "loss": 0.5395, + "loss/gen": 0.17251688241958618, + "loss/real": 0.28544631600379944, + "rewards/accuracies": 1.0, + "rewards/generated": -373.9593505859375, + "rewards/margins": 352.3387145996094, + "rewards/real": -21.620647430419922, + "step": 116 + }, + { + "epoch": 0.24, + "grad_norm": 43.58818988959696, + "learning_rate": 4.6890045086987707e-07, + "logits/generated": 1.441139578819275, + "logits/oppo_generated": -2.66965389251709, + "logits/oppo_real": -2.664613962173462, + "logits/real": 1.5334069728851318, + "logps/generated": -609.1144409179688, + "logps/oppo_gen": -70.15062713623047, + "logps/oppo_real": -289.0845947265625, + "logps/real": -356.73895263671875, + "loss": 0.6133, + "loss/gen": 0.12631765007972717, + "loss/real": 0.5410544872283936, + "rewards/accuracies": 1.0, + "rewards/generated": -538.9638061523438, + "rewards/margins": 471.3094177246094, + "rewards/real": -67.65434265136719, + "step": 117 + }, + { + "epoch": 0.25, + "grad_norm": 43.39746496997621, + "learning_rate": 4.680123520359519e-07, + "logits/generated": 1.1541767120361328, + "logits/oppo_generated": -2.7411558628082275, + "logits/oppo_real": -2.8276548385620117, + "logits/real": 1.5257081985473633, + "logps/generated": -463.33917236328125, + "logps/oppo_gen": -79.05010986328125, + "logps/oppo_real": -260.1556396484375, + "logps/real": -341.10089111328125, + "loss": 0.5586, + "loss/gen": 0.11574704945087433, + "loss/real": 0.5906919240951538, + "rewards/accuracies": 1.0, + "rewards/generated": -384.2890625, + "rewards/margins": 303.34381103515625, + "rewards/real": -80.94526672363281, + "step": 118 + }, + { + "epoch": 0.25, + "grad_norm": 41.35345986224279, + "learning_rate": 4.671126161760772e-07, + "logits/generated": 0.8925095796585083, + "logits/oppo_generated": -2.8381776809692383, + "logits/oppo_real": -2.895987033843994, + "logits/real": 0.3291308879852295, + "logps/generated": -404.5374755859375, + "logps/oppo_gen": -75.62970733642578, + "logps/oppo_real": -341.94390869140625, + "logps/real": -355.20977783203125, + "loss": 0.5461, + "loss/gen": 0.30142074823379517, + "loss/real": 0.24460729956626892, + "rewards/accuracies": 1.0, + "rewards/generated": -328.90777587890625, + "rewards/margins": 315.6418762207031, + "rewards/real": -13.265907287597656, + "step": 119 + }, + { + "epoch": 0.25, + "grad_norm": 49.230028469674885, + "learning_rate": 4.662012913161997e-07, + "logits/generated": 1.0944744348526, + "logits/oppo_generated": -2.8429150581359863, + "logits/oppo_real": -2.720982313156128, + "logits/real": -0.4009183645248413, + "logps/generated": -391.66265869140625, + "logps/oppo_gen": -83.18344116210938, + "logps/oppo_real": -297.22320556640625, + "logps/real": -295.4556884765625, + "loss": 0.5535, + "loss/gen": 0.34921205043792725, + "loss/real": 0.20909735560417175, + "rewards/accuracies": 1.0, + "rewards/generated": -308.47918701171875, + "rewards/margins": 310.2467041015625, + "rewards/real": 1.7674973011016846, + "step": 120 + }, + { + "epoch": 0.25, + "grad_norm": 42.11695635602497, + "learning_rate": 4.6527842610086124e-07, + "logits/generated": 2.4489541053771973, + "logits/oppo_generated": -2.769073009490967, + "logits/oppo_real": -2.7379775047302246, + "logits/real": 2.0011355876922607, + "logps/generated": -365.7623291015625, + "logps/oppo_gen": -63.66206359863281, + "logps/oppo_real": -185.47964477539062, + "logps/real": -223.77847290039062, + "loss": 0.5198, + "loss/gen": 0.2616707682609558, + "loss/real": 0.3866133689880371, + "rewards/accuracies": 0.875, + "rewards/generated": -302.1002502441406, + "rewards/margins": 263.8013916015625, + "rewards/real": -38.298851013183594, + "step": 121 + }, + { + "epoch": 0.26, + "grad_norm": 80.59431041391042, + "learning_rate": 4.6434406979060327e-07, + "logits/generated": 1.4197285175323486, + "logits/oppo_generated": -2.808683395385742, + "logits/oppo_real": -2.830435276031494, + "logits/real": 1.4447864294052124, + "logps/generated": -570.8482666015625, + "logps/oppo_gen": -82.84730529785156, + "logps/oppo_real": -266.9815979003906, + "logps/real": -341.551025390625, + "loss": 0.5194, + "loss/gen": 0.08523067831993103, + "loss/real": 0.5628464221954346, + "rewards/accuracies": 1.0, + "rewards/generated": -488.0009765625, + "rewards/margins": 413.4315490722656, + "rewards/real": -74.56942749023438, + "step": 122 + }, + { + "epoch": 0.26, + "grad_norm": 30.311157951927846, + "learning_rate": 4.6339827225933657e-07, + "logits/generated": 2.5612378120422363, + "logits/oppo_generated": -2.7827868461608887, + "logits/oppo_real": -2.7702150344848633, + "logits/real": 0.973285436630249, + "logps/generated": -453.61419677734375, + "logps/oppo_gen": -78.0826416015625, + "logps/oppo_real": -209.60366821289062, + "logps/real": -243.14308166503906, + "loss": 0.4662, + "loss/gen": 0.17920607328414917, + "loss/real": 0.3000218868255615, + "rewards/accuracies": 0.9375, + "rewards/generated": -375.53155517578125, + "rewards/margins": 341.99212646484375, + "rewards/real": -33.53944778442383, + "step": 123 + }, + { + "epoch": 0.26, + "grad_norm": 31.34001878339817, + "learning_rate": 4.6244108399167977e-07, + "logits/generated": 0.48759734630584717, + "logits/oppo_generated": -2.81278133392334, + "logits/oppo_real": -3.0445303916931152, + "logits/real": 0.41890883445739746, + "logps/generated": -512.637939453125, + "logps/oppo_gen": -97.97140502929688, + "logps/oppo_real": -318.54241943359375, + "logps/real": -327.8448791503906, + "loss": 0.4723, + "loss/gen": 0.11405383050441742, + "loss/real": 0.24334505200386047, + "rewards/accuracies": 1.0, + "rewards/generated": -414.66650390625, + "rewards/margins": 405.3641052246094, + "rewards/real": -9.302425384521484, + "step": 124 + }, + { + "epoch": 0.26, + "grad_norm": 43.36779760537333, + "learning_rate": 4.614725560802639e-07, + "logits/generated": 2.383960485458374, + "logits/oppo_generated": -2.5410995483398438, + "logits/oppo_real": -2.6250815391540527, + "logits/real": 1.8639309406280518, + "logps/generated": -426.3900146484375, + "logps/oppo_gen": -85.07911682128906, + "logps/oppo_real": -302.9610595703125, + "logps/real": -330.28704833984375, + "loss": 0.4588, + "loss/gen": 0.2807266116142273, + "loss/real": 0.2807849943637848, + "rewards/accuracies": 1.0, + "rewards/generated": -341.3109130859375, + "rewards/margins": 313.98492431640625, + "rewards/real": -27.325986862182617, + "step": 125 + }, + { + "epoch": 0.26, + "grad_norm": 22.65675107204528, + "learning_rate": 4.60492740223006e-07, + "logits/generated": 3.7563798427581787, + "logits/oppo_generated": -2.6942880153656006, + "logits/oppo_real": -2.620532989501953, + "logits/real": 1.8744386434555054, + "logps/generated": -482.1658020019531, + "logps/oppo_gen": -80.65577697753906, + "logps/oppo_real": -334.33636474609375, + "logps/real": -340.36029052734375, + "loss": 0.4313, + "loss/gen": 0.1339287906885147, + "loss/real": 0.25005483627319336, + "rewards/accuracies": 1.0, + "rewards/generated": -401.510009765625, + "rewards/margins": 395.486083984375, + "rewards/real": -6.023906707763672, + "step": 126 + }, + { + "epoch": 0.27, + "grad_norm": 40.99230289315758, + "learning_rate": 4.595016887203488e-07, + "logits/generated": 4.8035993576049805, + "logits/oppo_generated": -2.6506311893463135, + "logits/oppo_real": -2.5980076789855957, + "logits/real": 3.132660388946533, + "logps/generated": -512.6973876953125, + "logps/oppo_gen": -71.25318908691406, + "logps/oppo_real": -271.4465026855469, + "logps/real": -341.7394714355469, + "loss": 0.4682, + "loss/gen": 0.09215886890888214, + "loss/real": 0.5298959016799927, + "rewards/accuracies": 0.9375, + "rewards/generated": -441.4441833496094, + "rewards/margins": 371.1512451171875, + "rewards/real": -70.29296875, + "step": 127 + }, + { + "epoch": 0.27, + "grad_norm": 56.79854708680386, + "learning_rate": 4.584994544724695e-07, + "logits/generated": 3.480712890625, + "logits/oppo_generated": -2.7567696571350098, + "logits/oppo_real": -2.71878981590271, + "logits/real": 2.234100103378296, + "logps/generated": -505.15631103515625, + "logps/oppo_gen": -76.11280059814453, + "logps/oppo_real": -274.9986267089844, + "logps/real": -316.529541015625, + "loss": 0.5428, + "loss/gen": 0.06698533147573471, + "loss/real": 0.37679505348205566, + "rewards/accuracies": 1.0, + "rewards/generated": -429.04351806640625, + "rewards/margins": 387.5125732421875, + "rewards/real": -41.530914306640625, + "step": 128 + }, + { + "epoch": 0.27, + "grad_norm": 34.5051442609468, + "learning_rate": 4.574860909764559e-07, + "logits/generated": 3.7409512996673584, + "logits/oppo_generated": -2.7801530361175537, + "logits/oppo_real": -2.662064790725708, + "logits/real": 1.398315191268921, + "logps/generated": -539.3264770507812, + "logps/oppo_gen": -81.54147338867188, + "logps/oppo_real": -353.9856262207031, + "logps/real": -355.10321044921875, + "loss": 0.3566, + "loss/gen": 0.20957569777965546, + "loss/real": 0.2095394879579544, + "rewards/accuracies": 1.0, + "rewards/generated": -457.7850341796875, + "rewards/margins": 456.66748046875, + "rewards/real": -1.117553472518921, + "step": 129 + }, + { + "epoch": 0.27, + "grad_norm": 58.581181985450755, + "learning_rate": 4.5646165232345103e-07, + "logits/generated": 3.001497983932495, + "logits/oppo_generated": -2.7938759326934814, + "logits/oppo_real": -2.797100067138672, + "logits/real": -0.1904727816581726, + "logps/generated": -476.5736083984375, + "logps/oppo_gen": -85.75858306884766, + "logps/oppo_real": -330.778076171875, + "logps/real": -355.42279052734375, + "loss": 0.4177, + "loss/gen": 0.21888455748558044, + "loss/real": 0.3061525225639343, + "rewards/accuracies": 1.0, + "rewards/generated": -390.8150329589844, + "rewards/margins": 366.17034912109375, + "rewards/real": -24.644712448120117, + "step": 130 + }, + { + "epoch": 0.27, + "grad_norm": 76.23830596803028, + "learning_rate": 4.554261931957657e-07, + "logits/generated": 1.2798744440078735, + "logits/oppo_generated": -2.702157497406006, + "logits/oppo_real": -2.785520553588867, + "logits/real": 0.2064148187637329, + "logps/generated": -550.7872314453125, + "logps/oppo_gen": -101.4595947265625, + "logps/oppo_real": -297.305419921875, + "logps/real": -281.96148681640625, + "loss": 0.4163, + "loss/gen": 0.17837375402450562, + "loss/real": 0.21107184886932373, + "rewards/accuracies": 1.0, + "rewards/generated": -449.3276062011719, + "rewards/margins": 464.67156982421875, + "rewards/real": 15.343944549560547, + "step": 131 + }, + { + "epoch": 0.28, + "grad_norm": 42.78716460666826, + "learning_rate": 4.5437976886395955e-07, + "logits/generated": 3.7917985916137695, + "logits/oppo_generated": -2.3708133697509766, + "logits/oppo_real": -2.418696641921997, + "logits/real": 2.052703380584717, + "logps/generated": -596.48828125, + "logps/oppo_gen": -74.7413558959961, + "logps/oppo_real": -314.3866882324219, + "logps/real": -309.42364501953125, + "loss": 0.3384, + "loss/gen": 0.10826118290424347, + "loss/real": 0.19940531253814697, + "rewards/accuracies": 1.0, + "rewards/generated": -521.7468872070312, + "rewards/margins": 526.7098999023438, + "rewards/real": 4.963039875030518, + "step": 132 + }, + { + "epoch": 0.28, + "grad_norm": 35.28753611515155, + "learning_rate": 4.5332243518389136e-07, + "logits/generated": 3.8041744232177734, + "logits/oppo_generated": -2.743939161300659, + "logits/oppo_real": -2.73405122756958, + "logits/real": 1.2489296197891235, + "logps/generated": -574.4937133789062, + "logps/oppo_gen": -79.94535827636719, + "logps/oppo_real": -272.021240234375, + "logps/real": -293.32586669921875, + "loss": 0.4572, + "loss/gen": 0.04564107209444046, + "loss/real": 0.34681591391563416, + "rewards/accuracies": 1.0, + "rewards/generated": -494.54833984375, + "rewards/margins": 473.2436828613281, + "rewards/real": -21.304649353027344, + "step": 133 + }, + { + "epoch": 0.28, + "grad_norm": 66.3057437946084, + "learning_rate": 4.5225424859373684e-07, + "logits/generated": 3.78678560256958, + "logits/oppo_generated": -2.7007479667663574, + "logits/oppo_real": -2.753429412841797, + "logits/real": 0.6452586650848389, + "logps/generated": -532.5565185546875, + "logps/oppo_gen": -77.23085021972656, + "logps/oppo_real": -266.35296630859375, + "logps/real": -287.3046875, + "loss": 0.4927, + "loss/gen": 0.0773511677980423, + "loss/real": 0.2800845503807068, + "rewards/accuracies": 1.0, + "rewards/generated": -455.32568359375, + "rewards/margins": 434.3739929199219, + "rewards/real": -20.95172691345215, + "step": 134 + }, + { + "epoch": 0.28, + "grad_norm": 83.11441133217362, + "learning_rate": 4.511752661109768e-07, + "logits/generated": 2.705482244491577, + "logits/oppo_generated": -2.691554546356201, + "logits/oppo_real": -2.8548970222473145, + "logits/real": 1.9097120761871338, + "logps/generated": -569.287841796875, + "logps/oppo_gen": -83.92848205566406, + "logps/oppo_real": -314.796875, + "logps/real": -374.4678039550781, + "loss": 0.517, + "loss/gen": 0.06381608545780182, + "loss/real": 0.5655554533004761, + "rewards/accuracies": 0.875, + "rewards/generated": -485.3593444824219, + "rewards/margins": 425.6883850097656, + "rewards/real": -59.67094421386719, + "step": 135 + }, + { + "epoch": 0.28, + "grad_norm": 33.9451048253141, + "learning_rate": 4.5008554532935316e-07, + "logits/generated": 1.3386224508285522, + "logits/oppo_generated": -2.7305233478546143, + "logits/oppo_real": -2.919029474258423, + "logits/real": 0.10530074685811996, + "logps/generated": -542.9541625976562, + "logps/oppo_gen": -73.71615600585938, + "logps/oppo_real": -288.34320068359375, + "logps/real": -307.84796142578125, + "loss": 0.4413, + "loss/gen": 0.0951322540640831, + "loss/real": 0.34142056107521057, + "rewards/accuracies": 0.9375, + "rewards/generated": -469.238037109375, + "rewards/margins": 449.7333068847656, + "rewards/real": -19.504728317260742, + "step": 136 + }, + { + "epoch": 0.29, + "grad_norm": 39.38732446009757, + "learning_rate": 4.4898514441579493e-07, + "logits/generated": 1.0103791952133179, + "logits/oppo_generated": -2.7485411167144775, + "logits/oppo_real": -2.8734302520751953, + "logits/real": -0.42922842502593994, + "logps/generated": -525.3568115234375, + "logps/oppo_gen": -87.78644561767578, + "logps/oppo_real": -241.0199432373047, + "logps/real": -247.32534790039062, + "loss": 0.444, + "loss/gen": 0.13491667807102203, + "loss/real": 0.21978439390659332, + "rewards/accuracies": 1.0, + "rewards/generated": -437.5703125, + "rewards/margins": 431.26495361328125, + "rewards/real": -6.305412292480469, + "step": 137 + }, + { + "epoch": 0.29, + "grad_norm": 47.51406812641783, + "learning_rate": 4.478741221073135e-07, + "logits/generated": 0.6537960767745972, + "logits/oppo_generated": -2.693493127822876, + "logits/oppo_real": -2.865375280380249, + "logits/real": -0.2775810658931732, + "logps/generated": -534.7683715820312, + "logps/oppo_gen": -103.05206298828125, + "logps/oppo_real": -489.7313232421875, + "logps/real": -491.6168212890625, + "loss": 0.4552, + "loss/gen": 0.13188748061656952, + "loss/real": 0.23172855377197266, + "rewards/accuracies": 0.9375, + "rewards/generated": -431.71630859375, + "rewards/margins": 429.8308410644531, + "rewards/real": -1.8854761123657227, + "step": 138 + }, + { + "epoch": 0.29, + "grad_norm": 53.06036049131922, + "learning_rate": 4.467525377078671e-07, + "logits/generated": 1.995945930480957, + "logits/oppo_generated": -2.8657331466674805, + "logits/oppo_real": -2.861227512359619, + "logits/real": 0.10061898082494736, + "logps/generated": -467.5784912109375, + "logps/oppo_gen": -75.67571258544922, + "logps/oppo_real": -287.5257568359375, + "logps/real": -309.69976806640625, + "loss": 0.4271, + "loss/gen": 0.1389007419347763, + "loss/real": 0.3501040041446686, + "rewards/accuracies": 0.875, + "rewards/generated": -391.90277099609375, + "rewards/margins": 369.728759765625, + "rewards/real": -22.17399787902832, + "step": 139 + }, + { + "epoch": 0.29, + "grad_norm": 32.51703279629001, + "learning_rate": 4.456204510851956e-07, + "logits/generated": 3.4422943592071533, + "logits/oppo_generated": -2.8228468894958496, + "logits/oppo_real": -2.7876362800598145, + "logits/real": -0.5376209020614624, + "logps/generated": -495.888671875, + "logps/oppo_gen": -74.50221252441406, + "logps/oppo_real": -289.463623046875, + "logps/real": -322.7392578125, + "loss": 0.5096, + "loss/gen": 0.12224815785884857, + "loss/real": 0.4228206276893616, + "rewards/accuracies": 0.875, + "rewards/generated": -421.38641357421875, + "rewards/margins": 388.11077880859375, + "rewards/real": -33.275638580322266, + "step": 140 + }, + { + "epoch": 0.29, + "grad_norm": 42.809336390868786, + "learning_rate": 4.444779226676246e-07, + "logits/generated": 3.224811315536499, + "logits/oppo_generated": -2.8969969749450684, + "logits/oppo_real": -2.8022689819335938, + "logits/real": -0.3930954337120056, + "logps/generated": -569.3974609375, + "logps/oppo_gen": -73.09552764892578, + "logps/oppo_real": -194.8427734375, + "logps/real": -206.7709197998047, + "loss": 0.4614, + "loss/gen": 0.05774524062871933, + "loss/real": 0.2606027126312256, + "rewards/accuracies": 1.0, + "rewards/generated": -496.3019714355469, + "rewards/margins": 484.373779296875, + "rewards/real": -11.928150177001953, + "step": 141 + }, + { + "epoch": 0.3, + "grad_norm": 43.46620005101665, + "learning_rate": 4.4332501344084005e-07, + "logits/generated": 4.730867385864258, + "logits/oppo_generated": -2.628166675567627, + "logits/oppo_real": -2.599769353866577, + "logits/real": -0.41064295172691345, + "logps/generated": -721.416748046875, + "logps/oppo_gen": -60.03528594970703, + "logps/oppo_real": -204.53244018554688, + "logps/real": -209.001220703125, + "loss": 0.4182, + "loss/gen": 0.04421373829245567, + "loss/real": 0.2350148856639862, + "rewards/accuracies": 1.0, + "rewards/generated": -661.3814086914062, + "rewards/margins": 656.91259765625, + "rewards/real": -4.468780994415283, + "step": 142 + }, + { + "epoch": 0.3, + "grad_norm": 95.168892838272, + "learning_rate": 4.4216178494463295e-07, + "logits/generated": 3.0664350986480713, + "logits/oppo_generated": -2.8121323585510254, + "logits/oppo_real": -2.735476493835449, + "logits/real": 0.5808520317077637, + "logps/generated": -730.3409423828125, + "logps/oppo_gen": -83.52450561523438, + "logps/oppo_real": -399.65948486328125, + "logps/real": -436.56512451171875, + "loss": 0.4257, + "loss/gen": 0.08173231780529022, + "loss/real": 0.3919621706008911, + "rewards/accuracies": 1.0, + "rewards/generated": -646.81640625, + "rewards/margins": 609.910888671875, + "rewards/real": -36.90563201904297, + "step": 143 + }, + { + "epoch": 0.3, + "grad_norm": 33.59023782512469, + "learning_rate": 4.4098829926961477e-07, + "logits/generated": 2.08347225189209, + "logits/oppo_generated": -2.6980109214782715, + "logits/oppo_real": -2.983388900756836, + "logits/real": 2.226280450820923, + "logps/generated": -538.6220703125, + "logps/oppo_gen": -78.72826385498047, + "logps/oppo_real": -206.14263916015625, + "logps/real": -280.3409423828125, + "loss": 0.442, + "loss/gen": 0.16410240530967712, + "loss/real": 0.6128249764442444, + "rewards/accuracies": 1.0, + "rewards/generated": -459.893798828125, + "rewards/margins": 385.6955261230469, + "rewards/real": -74.19828796386719, + "step": 144 + }, + { + "epoch": 0.3, + "grad_norm": 88.10951215852066, + "learning_rate": 4.398046190539024e-07, + "logits/generated": 0.7178199291229248, + "logits/oppo_generated": -2.756943702697754, + "logits/oppo_real": -2.801675796508789, + "logits/real": -1.1681644916534424, + "logps/generated": -528.5025634765625, + "logps/oppo_gen": -86.45504760742188, + "logps/oppo_real": -309.9644775390625, + "logps/real": -289.3895568847656, + "loss": 0.4441, + "loss/gen": 0.22567950189113617, + "loss/real": 0.1723930984735489, + "rewards/accuracies": 1.0, + "rewards/generated": -442.04754638671875, + "rewards/margins": 462.6224365234375, + "rewards/real": 20.574920654296875, + "step": 145 + }, + { + "epoch": 0.31, + "grad_norm": 96.93573197130407, + "learning_rate": 4.3861080747977566e-07, + "logits/generated": 2.6402652263641357, + "logits/oppo_generated": -2.6960177421569824, + "logits/oppo_real": -2.7647712230682373, + "logits/real": 0.7887452840805054, + "logps/generated": -672.70458984375, + "logps/oppo_gen": -86.10973358154297, + "logps/oppo_real": -239.423583984375, + "logps/real": -226.11953735351562, + "loss": 0.4131, + "loss/gen": 0.06325114518404007, + "loss/real": 0.2073189914226532, + "rewards/accuracies": 1.0, + "rewards/generated": -586.5948486328125, + "rewards/margins": 599.8988647460938, + "rewards/real": 13.304061889648438, + "step": 146 + }, + { + "epoch": 0.31, + "grad_norm": 45.219169778200126, + "learning_rate": 4.37406928270304e-07, + "logits/generated": 1.6589947938919067, + "logits/oppo_generated": -2.884913444519043, + "logits/oppo_real": -3.0370020866394043, + "logits/real": 1.7600839138031006, + "logps/generated": -586.7601318359375, + "logps/oppo_gen": -84.35366821289062, + "logps/oppo_real": -321.58514404296875, + "logps/real": -338.66668701171875, + "loss": 0.4066, + "loss/gen": 0.061856117099523544, + "loss/real": 0.3464030623435974, + "rewards/accuracies": 1.0, + "rewards/generated": -502.40643310546875, + "rewards/margins": 485.32489013671875, + "rewards/real": -17.08155059814453, + "step": 147 + }, + { + "epoch": 0.31, + "grad_norm": 63.70267659886337, + "learning_rate": 4.3619304568594546e-07, + "logits/generated": 4.678864479064941, + "logits/oppo_generated": -2.751469135284424, + "logits/oppo_real": -2.747343063354492, + "logits/real": 2.1347851753234863, + "logps/generated": -602.4761962890625, + "logps/oppo_gen": -61.375526428222656, + "logps/oppo_real": -252.73489379882812, + "logps/real": -278.0063781738281, + "loss": 0.4028, + "loss/gen": 0.09595850110054016, + "loss/real": 0.30543041229248047, + "rewards/accuracies": 1.0, + "rewards/generated": -541.1007080078125, + "rewards/margins": 515.8291625976562, + "rewards/real": -25.27145767211914, + "step": 148 + }, + { + "epoch": 0.31, + "grad_norm": 23.65093958724209, + "learning_rate": 4.349692245211165e-07, + "logits/generated": 4.383773326873779, + "logits/oppo_generated": -2.817481517791748, + "logits/oppo_real": -2.831568717956543, + "logits/real": 0.075783371925354, + "logps/generated": -662.4940185546875, + "logps/oppo_gen": -105.8621597290039, + "logps/oppo_real": -327.9042663574219, + "logps/real": -348.6964111328125, + "loss": 0.3471, + "loss/gen": 0.0598088875412941, + "loss/real": 0.3484806418418884, + "rewards/accuracies": 0.9375, + "rewards/generated": -556.6318359375, + "rewards/margins": 535.8397216796875, + "rewards/real": -20.792137145996094, + "step": 149 + }, + { + "epoch": 0.31, + "grad_norm": 44.175200783446506, + "learning_rate": 4.337355301007335e-07, + "logits/generated": 2.0324692726135254, + "logits/oppo_generated": -2.709242820739746, + "logits/oppo_real": -2.915376663208008, + "logits/real": 0.17188304662704468, + "logps/generated": -653.400390625, + "logps/oppo_gen": -87.3215103149414, + "logps/oppo_real": -375.57025146484375, + "logps/real": -374.0672607421875, + "loss": 0.4053, + "loss/gen": 0.05262891948223114, + "loss/real": 0.2269752323627472, + "rewards/accuracies": 1.0, + "rewards/generated": -566.0789184570312, + "rewards/margins": 567.5818481445312, + "rewards/real": 1.5029735565185547, + "step": 150 + }, + { + "epoch": 0.32, + "grad_norm": 43.954390034674724, + "learning_rate": 4.324920282767256e-07, + "logits/generated": 2.20145320892334, + "logits/oppo_generated": -2.964322328567505, + "logits/oppo_real": -2.982285499572754, + "logits/real": -1.8403794765472412, + "logps/generated": -593.6246948242188, + "logps/oppo_gen": -96.36965942382812, + "logps/oppo_real": -400.2677001953125, + "logps/real": -381.9501953125, + "loss": 0.3817, + "loss/gen": 0.05244412645697594, + "loss/real": 0.17407363653182983, + "rewards/accuracies": 1.0, + "rewards/generated": -497.2550048828125, + "rewards/margins": 515.5724487304688, + "rewards/real": 18.3174991607666, + "step": 151 + }, + { + "epoch": 0.32, + "grad_norm": 24.56001373986472, + "learning_rate": 4.312387854245201e-07, + "logits/generated": 2.274653911590576, + "logits/oppo_generated": -2.6832199096679688, + "logits/oppo_real": -2.6996188163757324, + "logits/real": 0.23763501644134521, + "logps/generated": -595.6156005859375, + "logps/oppo_gen": -99.48007202148438, + "logps/oppo_real": -341.5190734863281, + "logps/real": -335.8973693847656, + "loss": 0.4202, + "loss/gen": 0.08892205357551575, + "loss/real": 0.24602922797203064, + "rewards/accuracies": 1.0, + "rewards/generated": -496.13555908203125, + "rewards/margins": 501.75726318359375, + "rewards/real": 5.621710777282715, + "step": 152 + }, + { + "epoch": 0.32, + "grad_norm": 88.40290290588595, + "learning_rate": 4.2997586843949896e-07, + "logits/generated": 2.223160982131958, + "logits/oppo_generated": -2.5872344970703125, + "logits/oppo_real": -2.4579038619995117, + "logits/real": -1.5002179145812988, + "logps/generated": -527.0772705078125, + "logps/oppo_gen": -70.17259216308594, + "logps/oppo_real": -416.83404541015625, + "logps/real": -401.7785949707031, + "loss": 0.4494, + "loss/gen": 0.11349719762802124, + "loss/real": 0.18198516964912415, + "rewards/accuracies": 1.0, + "rewards/generated": -456.90472412109375, + "rewards/margins": 471.960205078125, + "rewards/real": 15.055469512939453, + "step": 153 + }, + { + "epoch": 0.32, + "grad_norm": 88.65677597217964, + "learning_rate": 4.287033447334286e-07, + "logits/generated": 2.3303475379943848, + "logits/oppo_generated": -2.7479500770568848, + "logits/oppo_real": -2.8972339630126953, + "logits/real": -0.40632662177085876, + "logps/generated": -480.84222412109375, + "logps/oppo_gen": -80.25750732421875, + "logps/oppo_real": -294.4725646972656, + "logps/real": -291.6681823730469, + "loss": 0.5496, + "loss/gen": 0.2343224287033081, + "loss/real": 0.23040422797203064, + "rewards/accuracies": 1.0, + "rewards/generated": -400.584716796875, + "rewards/margins": 403.38909912109375, + "rewards/real": 2.804377555847168, + "step": 154 + }, + { + "epoch": 0.32, + "grad_norm": 32.896789060161325, + "learning_rate": 4.2742128223086115e-07, + "logits/generated": 0.9537273645401001, + "logits/oppo_generated": -2.8202219009399414, + "logits/oppo_real": -3.0027949810028076, + "logits/real": -0.3538452982902527, + "logps/generated": -534.1832275390625, + "logps/oppo_gen": -73.1172103881836, + "logps/oppo_real": -376.35302734375, + "logps/real": -374.4771728515625, + "loss": 0.3783, + "loss/gen": 0.09071531891822815, + "loss/real": 0.2262829840183258, + "rewards/accuracies": 1.0, + "rewards/generated": -461.0660400390625, + "rewards/margins": 462.94183349609375, + "rewards/real": 1.8758478164672852, + "step": 155 + }, + { + "epoch": 0.33, + "grad_norm": 31.70462085246309, + "learning_rate": 4.261297493655092e-07, + "logits/generated": 3.504033088684082, + "logits/oppo_generated": -2.5366318225860596, + "logits/oppo_real": -2.6091504096984863, + "logits/real": 0.27710264921188354, + "logps/generated": -533.432373046875, + "logps/oppo_gen": -70.62061309814453, + "logps/oppo_real": -308.5993957519531, + "logps/real": -312.66424560546875, + "loss": 0.395, + "loss/gen": 0.09391500055789948, + "loss/real": 0.3338760435581207, + "rewards/accuracies": 0.9375, + "rewards/generated": -462.81170654296875, + "rewards/margins": 458.74688720703125, + "rewards/real": -4.0648345947265625, + "step": 156 + }, + { + "epoch": 0.33, + "grad_norm": 55.69409770820648, + "learning_rate": 4.2482881507659244e-07, + "logits/generated": 5.257065773010254, + "logits/oppo_generated": -2.839346408843994, + "logits/oppo_real": -2.589020013809204, + "logits/real": -0.5401603579521179, + "logps/generated": -675.083984375, + "logps/oppo_gen": -81.69267272949219, + "logps/oppo_real": -345.9958190917969, + "logps/real": -360.93621826171875, + "loss": 0.4078, + "loss/gen": 0.04161800444126129, + "loss/real": 0.3369942605495453, + "rewards/accuracies": 1.0, + "rewards/generated": -593.391357421875, + "rewards/margins": 578.450927734375, + "rewards/real": -14.940374374389648, + "step": 157 + }, + { + "epoch": 0.33, + "grad_norm": 69.88690640957003, + "learning_rate": 4.235185488051585e-07, + "logits/generated": 5.414549827575684, + "logits/oppo_generated": -2.6783924102783203, + "logits/oppo_real": -2.712614059448242, + "logits/real": 1.8532522916793823, + "logps/generated": -568.1709594726562, + "logps/oppo_gen": -56.096778869628906, + "logps/oppo_real": -230.0592498779297, + "logps/real": -244.01300048828125, + "loss": 0.3708, + "loss/gen": 0.04885585233569145, + "loss/real": 0.2971190810203552, + "rewards/accuracies": 1.0, + "rewards/generated": -512.0741577148438, + "rewards/margins": 498.12042236328125, + "rewards/real": -13.953733444213867, + "step": 158 + }, + { + "epoch": 0.33, + "grad_norm": 33.58151666793431, + "learning_rate": 4.2219902049037554e-07, + "logits/generated": 4.000687122344971, + "logits/oppo_generated": -2.7408018112182617, + "logits/oppo_real": -2.6598281860351562, + "logits/real": 0.4296528697013855, + "logps/generated": -535.0169677734375, + "logps/oppo_gen": -72.39141845703125, + "logps/oppo_real": -357.7438659667969, + "logps/real": -398.12890625, + "loss": 0.3407, + "loss/gen": 0.07793214172124863, + "loss/real": 0.4461689591407776, + "rewards/accuracies": 1.0, + "rewards/generated": -462.62554931640625, + "rewards/margins": 422.2405090332031, + "rewards/real": -40.385040283203125, + "step": 159 + }, + { + "epoch": 0.33, + "grad_norm": 49.08348725083748, + "learning_rate": 4.2087030056579986e-07, + "logits/generated": 1.8264459371566772, + "logits/oppo_generated": -2.8000125885009766, + "logits/oppo_real": -2.9204776287078857, + "logits/real": 0.3307687044143677, + "logps/generated": -575.8387451171875, + "logps/oppo_gen": -87.82046508789062, + "logps/oppo_real": -341.7147216796875, + "logps/real": -382.95611572265625, + "loss": 0.4618, + "loss/gen": 0.062293656170368195, + "loss/real": 0.4385297894477844, + "rewards/accuracies": 1.0, + "rewards/generated": -488.0182189941406, + "rewards/margins": 446.7768249511719, + "rewards/real": -41.24138641357422, + "step": 160 + }, + { + "epoch": 0.34, + "grad_norm": 41.03429485798662, + "learning_rate": 4.1953245995561577e-07, + "logits/generated": 4.770796298980713, + "logits/oppo_generated": -2.6554837226867676, + "logits/oppo_real": -2.6420817375183105, + "logits/real": -0.8062165975570679, + "logps/generated": -464.02740478515625, + "logps/oppo_gen": -63.17603302001953, + "logps/oppo_real": -296.563720703125, + "logps/real": -275.2172546386719, + "loss": 0.3776, + "loss/gen": 0.20890629291534424, + "loss/real": 0.20531198382377625, + "rewards/accuracies": 1.0, + "rewards/generated": -400.85137939453125, + "rewards/margins": 422.1978759765625, + "rewards/real": 21.34646224975586, + "step": 161 + }, + { + "epoch": 0.34, + "grad_norm": 54.28695053184398, + "learning_rate": 4.1818557007085e-07, + "logits/generated": 1.0603010654449463, + "logits/oppo_generated": -2.7772350311279297, + "logits/oppo_real": -2.840733528137207, + "logits/real": -0.4071798622608185, + "logps/generated": -500.5398254394531, + "logps/oppo_gen": -87.80577087402344, + "logps/oppo_real": -293.36444091796875, + "logps/real": -301.470703125, + "loss": 0.4106, + "loss/gen": 0.1799190789461136, + "loss/real": 0.3776960074901581, + "rewards/accuracies": 1.0, + "rewards/generated": -412.73406982421875, + "rewards/margins": 404.6278076171875, + "rewards/real": -8.10627555847168, + "step": 162 + }, + { + "epoch": 0.34, + "grad_norm": 28.239338981521527, + "learning_rate": 4.1682970280555987e-07, + "logits/generated": 0.7941306233406067, + "logits/oppo_generated": -2.940112590789795, + "logits/oppo_real": -3.094730854034424, + "logits/real": -0.9150586128234863, + "logps/generated": -491.39971923828125, + "logps/oppo_gen": -76.87165832519531, + "logps/oppo_real": -265.2705383300781, + "logps/real": -275.28662109375, + "loss": 0.4083, + "loss/gen": 0.19433584809303284, + "loss/real": 0.27786490321159363, + "rewards/accuracies": 1.0, + "rewards/generated": -414.528076171875, + "rewards/margins": 404.51202392578125, + "rewards/real": -10.016090393066406, + "step": 163 + }, + { + "epoch": 0.34, + "grad_norm": 39.62413279068783, + "learning_rate": 4.154649305329958e-07, + "logits/generated": 1.9323348999023438, + "logits/oppo_generated": -2.86384916305542, + "logits/oppo_real": -2.868351697921753, + "logits/real": -0.1303810477256775, + "logps/generated": -557.84228515625, + "logps/oppo_gen": -69.69680786132812, + "logps/oppo_real": -212.30072021484375, + "logps/real": -245.2010498046875, + "loss": 0.501, + "loss/gen": 0.10067278146743774, + "loss/real": 0.3780396580696106, + "rewards/accuracies": 1.0, + "rewards/generated": -488.1455078125, + "rewards/margins": 455.24517822265625, + "rewards/real": -32.900325775146484, + "step": 164 + }, + { + "epoch": 0.35, + "grad_norm": 32.57133659789597, + "learning_rate": 4.140913261017382e-07, + "logits/generated": 4.161721229553223, + "logits/oppo_generated": -2.869417190551758, + "logits/oppo_real": -2.839968204498291, + "logits/real": -0.5776036977767944, + "logps/generated": -576.4458618164062, + "logps/oppo_gen": -88.48406982421875, + "logps/oppo_real": -363.1581726074219, + "logps/real": -361.1283874511719, + "loss": 0.3818, + "loss/gen": 0.06913182139396667, + "loss/real": 0.25034162402153015, + "rewards/accuracies": 1.0, + "rewards/generated": -487.9617919921875, + "rewards/margins": 489.9915771484375, + "rewards/real": 2.029754161834717, + "step": 165 + }, + { + "epoch": 0.35, + "grad_norm": 58.546320387018135, + "learning_rate": 4.127089628318089e-07, + "logits/generated": 3.4975500106811523, + "logits/oppo_generated": -2.6603808403015137, + "logits/oppo_real": -2.783158302307129, + "logits/real": -0.20328007638454437, + "logps/generated": -624.7900390625, + "logps/oppo_gen": -71.49801635742188, + "logps/oppo_real": -266.64727783203125, + "logps/real": -276.4420166015625, + "loss": 0.4823, + "loss/gen": 0.14242783188819885, + "loss/real": 0.27045345306396484, + "rewards/accuracies": 1.0, + "rewards/generated": -553.2919921875, + "rewards/margins": 543.497314453125, + "rewards/real": -9.794729232788086, + "step": 166 + }, + { + "epoch": 0.35, + "grad_norm": 32.59188857939771, + "learning_rate": 4.113179145107575e-07, + "logits/generated": 3.191453456878662, + "logits/oppo_generated": -2.6542935371398926, + "logits/oppo_real": -2.6339142322540283, + "logits/real": -0.12376086413860321, + "logps/generated": -537.1490478515625, + "logps/oppo_gen": -78.53817749023438, + "logps/oppo_real": -259.07733154296875, + "logps/real": -275.25604248046875, + "loss": 0.374, + "loss/gen": 0.11224167793989182, + "loss/real": 0.27741315960884094, + "rewards/accuracies": 1.0, + "rewards/generated": -458.61083984375, + "rewards/margins": 442.43212890625, + "rewards/real": -16.178741455078125, + "step": 167 + }, + { + "epoch": 0.35, + "grad_norm": 35.26097942374651, + "learning_rate": 4.099182553897228e-07, + "logits/generated": 3.541701555252075, + "logits/oppo_generated": -2.703266143798828, + "logits/oppo_real": -2.672393798828125, + "logits/real": -1.2182093858718872, + "logps/generated": -678.5563354492188, + "logps/oppo_gen": -94.5416259765625, + "logps/oppo_real": -463.65789794921875, + "logps/real": -437.16644287109375, + "loss": 0.3667, + "loss/gen": 0.08236212283372879, + "loss/real": 0.1626996248960495, + "rewards/accuracies": 1.0, + "rewards/generated": -584.0147094726562, + "rewards/margins": 610.506103515625, + "rewards/real": 26.491458892822266, + "step": 168 + }, + { + "epoch": 0.35, + "grad_norm": 29.281886791034637, + "learning_rate": 4.0851006017946945e-07, + "logits/generated": 5.741336822509766, + "logits/oppo_generated": -2.6494524478912354, + "logits/oppo_real": -2.6128625869750977, + "logits/real": -0.31815677881240845, + "logps/generated": -608.6373291015625, + "logps/oppo_gen": -66.14505004882812, + "logps/oppo_real": -303.870361328125, + "logps/real": -288.19305419921875, + "loss": 0.3213, + "loss/gen": 0.06941390037536621, + "loss/real": 0.1854477822780609, + "rewards/accuracies": 1.0, + "rewards/generated": -542.4923095703125, + "rewards/margins": 558.1696166992188, + "rewards/real": 15.677297592163086, + "step": 169 + }, + { + "epoch": 0.36, + "grad_norm": 40.914190047777126, + "learning_rate": 4.070934040463998e-07, + "logits/generated": 2.282456398010254, + "logits/oppo_generated": -2.586618423461914, + "logits/oppo_real": -2.638277292251587, + "logits/real": 1.154468059539795, + "logps/generated": -638.625732421875, + "logps/oppo_gen": -84.69650268554688, + "logps/oppo_real": -285.56890869140625, + "logps/real": -310.78338623046875, + "loss": 0.3753, + "loss/gen": 0.15161314606666565, + "loss/real": 0.3878932595252991, + "rewards/accuracies": 0.9375, + "rewards/generated": -553.9293212890625, + "rewards/margins": 528.71484375, + "rewards/real": -25.21445083618164, + "step": 170 + }, + { + "epoch": 0.36, + "grad_norm": 46.814385288305076, + "learning_rate": 4.056683626085422e-07, + "logits/generated": 3.1288087368011475, + "logits/oppo_generated": -2.861464500427246, + "logits/oppo_real": -3.037282943725586, + "logits/real": 0.27654290199279785, + "logps/generated": -743.48583984375, + "logps/oppo_gen": -72.53483581542969, + "logps/oppo_real": -316.06658935546875, + "logps/real": -325.1678771972656, + "loss": 0.4359, + "loss/gen": 0.02361534722149372, + "loss/real": 0.35334840416908264, + "rewards/accuracies": 0.9375, + "rewards/generated": -670.9510498046875, + "rewards/margins": 661.8497314453125, + "rewards/real": -9.101311683654785, + "step": 171 + }, + { + "epoch": 0.36, + "grad_norm": 33.77107812571931, + "learning_rate": 4.042350119315141e-07, + "logits/generated": 3.522606372833252, + "logits/oppo_generated": -2.546668529510498, + "logits/oppo_real": -2.747616767883301, + "logits/real": 1.465965986251831, + "logps/generated": -586.4928588867188, + "logps/oppo_gen": -50.940589904785156, + "logps/oppo_real": -195.0120849609375, + "logps/real": -198.96145629882812, + "loss": 0.4098, + "loss/gen": 0.13806982338428497, + "loss/real": 0.2442954182624817, + "rewards/accuracies": 0.9375, + "rewards/generated": -535.55224609375, + "rewards/margins": 531.6029052734375, + "rewards/real": -3.949376106262207, + "step": 172 + }, + { + "epoch": 0.36, + "grad_norm": 47.288798554731756, + "learning_rate": 4.027934285244623e-07, + "logits/generated": 2.3074827194213867, + "logits/oppo_generated": -2.7699496746063232, + "logits/oppo_real": -2.825406074523926, + "logits/real": -0.9217118620872498, + "logps/generated": -621.0782470703125, + "logps/oppo_gen": -115.70794677734375, + "logps/oppo_real": -436.4163818359375, + "logps/real": -410.5096435546875, + "loss": 0.3803, + "loss/gen": 0.22207143902778625, + "loss/real": 0.1597864329814911, + "rewards/accuracies": 1.0, + "rewards/generated": -505.3702392578125, + "rewards/margins": 531.277099609375, + "rewards/real": 25.90674591064453, + "step": 173 + }, + { + "epoch": 0.36, + "grad_norm": 38.79185460117159, + "learning_rate": 4.0134368933597864e-07, + "logits/generated": 2.8078434467315674, + "logits/oppo_generated": -2.8050785064697266, + "logits/oppo_real": -2.6773767471313477, + "logits/real": -1.348842740058899, + "logps/generated": -618.10205078125, + "logps/oppo_gen": -84.29270935058594, + "logps/oppo_real": -391.57928466796875, + "logps/real": -382.04180908203125, + "loss": 0.3099, + "loss/gen": 0.16892102360725403, + "loss/real": 0.18786463141441345, + "rewards/accuracies": 1.0, + "rewards/generated": -533.8093872070312, + "rewards/margins": 543.3468017578125, + "rewards/real": 9.537463188171387, + "step": 174 + }, + { + "epoch": 0.37, + "grad_norm": 53.66757624706351, + "learning_rate": 3.9988587174999306e-07, + "logits/generated": 2.588200569152832, + "logits/oppo_generated": -3.0364410877227783, + "logits/oppo_real": -2.832014560699463, + "logits/real": -2.086704730987549, + "logps/generated": -659.8938598632812, + "logps/oppo_gen": -103.90766906738281, + "logps/oppo_real": -399.5452575683594, + "logps/real": -396.6692810058594, + "loss": 0.3727, + "loss/gen": 0.04421517252922058, + "loss/real": 0.24986904859542847, + "rewards/accuracies": 1.0, + "rewards/generated": -555.9861450195312, + "rewards/margins": 558.8621826171875, + "rewards/real": 2.875990867614746, + "step": 175 + }, + { + "epoch": 0.37, + "grad_norm": 21.4714961398682, + "learning_rate": 3.9842005358164267e-07, + "logits/generated": 3.0104498863220215, + "logits/oppo_generated": -2.735055446624756, + "logits/oppo_real": -2.919309139251709, + "logits/real": -0.7276687026023865, + "logps/generated": -599.495849609375, + "logps/oppo_gen": -73.57182312011719, + "logps/oppo_real": -302.60333251953125, + "logps/real": -301.95623779296875, + "loss": 0.3197, + "loss/gen": 0.041153088212013245, + "loss/real": 0.21765488386154175, + "rewards/accuracies": 1.0, + "rewards/generated": -525.924072265625, + "rewards/margins": 526.571044921875, + "rewards/real": 0.6470375061035156, + "step": 176 + }, + { + "epoch": 0.37, + "grad_norm": 54.03969669011268, + "learning_rate": 3.9694631307311825e-07, + "logits/generated": 4.6008453369140625, + "logits/oppo_generated": -2.830564022064209, + "logits/oppo_real": -2.691429376602173, + "logits/real": 0.8001154065132141, + "logps/generated": -592.5567626953125, + "logps/oppo_gen": -68.98664093017578, + "logps/oppo_real": -178.04420471191406, + "logps/real": -268.3450927734375, + "loss": 0.4679, + "loss/gen": 0.1762559711933136, + "loss/real": 0.8209776878356934, + "rewards/accuracies": 1.0, + "rewards/generated": -523.5701293945312, + "rewards/margins": 433.269287109375, + "rewards/real": -90.3008804321289, + "step": 177 + }, + { + "epoch": 0.37, + "grad_norm": 36.03140584311484, + "learning_rate": 3.954647288894882e-07, + "logits/generated": 1.2684483528137207, + "logits/oppo_generated": -2.752481698989868, + "logits/oppo_real": -2.8730828762054443, + "logits/real": -0.48670902848243713, + "logps/generated": -603.163330078125, + "logps/oppo_gen": -70.56501770019531, + "logps/oppo_real": -325.8918762207031, + "logps/real": -301.71539306640625, + "loss": 0.3475, + "loss/gen": 0.11980742961168289, + "loss/real": 0.16193076968193054, + "rewards/accuracies": 1.0, + "rewards/generated": -532.5982666015625, + "rewards/margins": 556.7747192382812, + "rewards/real": 24.176467895507812, + "step": 178 + }, + { + "epoch": 0.37, + "grad_norm": 24.202740424236442, + "learning_rate": 3.9397538011449896e-07, + "logits/generated": 4.160590171813965, + "logits/oppo_generated": -2.6507742404937744, + "logits/oppo_real": -2.5858640670776367, + "logits/real": 0.4383518695831299, + "logps/generated": -581.2891845703125, + "logps/oppo_gen": -63.321895599365234, + "logps/oppo_real": -241.67031860351562, + "logps/real": -224.41864013671875, + "loss": 0.3749, + "loss/gen": 0.17947952449321747, + "loss/real": 0.175423726439476, + "rewards/accuracies": 1.0, + "rewards/generated": -517.96728515625, + "rewards/margins": 535.218994140625, + "rewards/real": 17.251686096191406, + "step": 179 + }, + { + "epoch": 0.38, + "grad_norm": 40.32214788861777, + "learning_rate": 3.9247834624635404e-07, + "logits/generated": 5.015523433685303, + "logits/oppo_generated": -2.7281503677368164, + "logits/oppo_real": -2.559065818786621, + "logits/real": 0.1538265496492386, + "logps/generated": -683.5272827148438, + "logps/oppo_gen": -62.72259521484375, + "logps/oppo_real": -167.6991729736328, + "logps/real": -162.67086791992188, + "loss": 0.3274, + "loss/gen": 0.0464489683508873, + "loss/real": 0.20436066389083862, + "rewards/accuracies": 1.0, + "rewards/generated": -620.8046875, + "rewards/margins": 625.8330078125, + "rewards/real": 5.028313636779785, + "step": 180 + }, + { + "epoch": 0.38, + "grad_norm": 39.37261122466273, + "learning_rate": 3.9097370719347065e-07, + "logits/generated": 2.0698001384735107, + "logits/oppo_generated": -2.910065174102783, + "logits/oppo_real": -2.79630970954895, + "logits/real": -0.8888975381851196, + "logps/generated": -638.67626953125, + "logps/oppo_gen": -88.21836853027344, + "logps/oppo_real": -352.46441650390625, + "logps/real": -384.6054382324219, + "loss": 0.3955, + "loss/gen": 0.05475688725709915, + "loss/real": 0.3640737533569336, + "rewards/accuracies": 0.9375, + "rewards/generated": -550.4578857421875, + "rewards/margins": 518.31689453125, + "rewards/real": -32.14099884033203, + "step": 181 + }, + { + "epoch": 0.38, + "grad_norm": 46.871757700366594, + "learning_rate": 3.894615432702143e-07, + "logits/generated": 3.8333396911621094, + "logits/oppo_generated": -2.741304397583008, + "logits/oppo_real": -2.577056884765625, + "logits/real": -1.3356674909591675, + "logps/generated": -626.2735595703125, + "logps/oppo_gen": -113.4937744140625, + "logps/oppo_real": -357.3503723144531, + "logps/real": -327.42108154296875, + "loss": 0.3519, + "loss/gen": 0.07857067883014679, + "loss/real": 0.16131818294525146, + "rewards/accuracies": 1.0, + "rewards/generated": -512.7797241210938, + "rewards/margins": 542.7090454101562, + "rewards/real": 29.929283142089844, + "step": 182 + }, + { + "epoch": 0.38, + "grad_norm": 23.50506796873482, + "learning_rate": 3.879419351926115e-07, + "logits/generated": 1.8759452104568481, + "logits/oppo_generated": -2.8626461029052734, + "logits/oppo_real": -2.836373805999756, + "logits/real": -1.6114048957824707, + "logps/generated": -601.630126953125, + "logps/oppo_gen": -81.16427612304688, + "logps/oppo_real": -371.1536865234375, + "logps/real": -348.64056396484375, + "loss": 0.3708, + "loss/gen": 0.0639682188630104, + "loss/real": 0.16622665524482727, + "rewards/accuracies": 1.0, + "rewards/generated": -520.4658203125, + "rewards/margins": 542.9788818359375, + "rewards/real": 22.513111114501953, + "step": 183 + }, + { + "epoch": 0.38, + "grad_norm": 36.387666260140605, + "learning_rate": 3.864149640740416e-07, + "logits/generated": 0.818359375, + "logits/oppo_generated": -2.6479756832122803, + "logits/oppo_real": -2.969878673553467, + "logits/real": -0.3535976707935333, + "logps/generated": -607.1737060546875, + "logps/oppo_gen": -84.10977172851562, + "logps/oppo_real": -309.2869873046875, + "logps/real": -311.79852294921875, + "loss": 0.3416, + "loss/gen": 0.15428604185581207, + "loss/real": 0.2501806616783142, + "rewards/accuracies": 1.0, + "rewards/generated": -523.06396484375, + "rewards/margins": 520.552490234375, + "rewards/real": -2.511505126953125, + "step": 184 + }, + { + "epoch": 0.39, + "grad_norm": 37.67410429944719, + "learning_rate": 3.848807114209074e-07, + "logits/generated": 3.9288644790649414, + "logits/oppo_generated": -2.626985549926758, + "logits/oppo_real": -2.517353057861328, + "logits/real": -0.48309874534606934, + "logps/generated": -566.0116577148438, + "logps/oppo_gen": -63.896446228027344, + "logps/oppo_real": -215.14120483398438, + "logps/real": -218.39749145507812, + "loss": 0.3856, + "loss/gen": 0.05431937426328659, + "loss/real": 0.25885841250419617, + "rewards/accuracies": 1.0, + "rewards/generated": -502.115234375, + "rewards/margins": 498.85894775390625, + "rewards/real": -3.2562923431396484, + "step": 185 + }, + { + "epoch": 0.39, + "grad_norm": 29.94640904266789, + "learning_rate": 3.833392591282838e-07, + "logits/generated": 2.5921592712402344, + "logits/oppo_generated": -2.8032891750335693, + "logits/oppo_real": -2.858144760131836, + "logits/real": 0.4319887161254883, + "logps/generated": -600.787353515625, + "logps/oppo_gen": -70.31141662597656, + "logps/oppo_real": -278.02667236328125, + "logps/real": -309.3092041015625, + "loss": 0.3872, + "loss/gen": 0.16086134314537048, + "loss/real": 0.4028547406196594, + "rewards/accuracies": 0.9375, + "rewards/generated": -530.4760131835938, + "rewards/margins": 499.19342041015625, + "rewards/real": -31.282567977905273, + "step": 186 + }, + { + "epoch": 0.39, + "grad_norm": 32.256352708250546, + "learning_rate": 3.8179068947554705e-07, + "logits/generated": 3.833861827850342, + "logits/oppo_generated": -2.8886849880218506, + "logits/oppo_real": -2.736198902130127, + "logits/real": -0.7970997095108032, + "logps/generated": -652.3579711914062, + "logps/oppo_gen": -80.37522888183594, + "logps/oppo_real": -261.8301086425781, + "logps/real": -263.4853515625, + "loss": 0.4055, + "loss/gen": 0.05830386281013489, + "loss/real": 0.27234482765197754, + "rewards/accuracies": 1.0, + "rewards/generated": -571.9827880859375, + "rewards/margins": 570.3275756835938, + "rewards/real": -1.6552231311798096, + "step": 187 + }, + { + "epoch": 0.39, + "grad_norm": 49.28620130089024, + "learning_rate": 3.8023508512198257e-07, + "logits/generated": 4.238766193389893, + "logits/oppo_generated": -2.9484691619873047, + "logits/oppo_real": -2.766463041305542, + "logits/real": -1.860574722290039, + "logps/generated": -593.7321166992188, + "logps/oppo_gen": -76.98384094238281, + "logps/oppo_real": -369.90545654296875, + "logps/real": -355.2281494140625, + "loss": 0.3579, + "loss/gen": 0.053326621651649475, + "loss/real": 0.19532784819602966, + "rewards/accuracies": 1.0, + "rewards/generated": -516.748291015625, + "rewards/margins": 531.4256591796875, + "rewards/real": 14.677343368530273, + "step": 188 + }, + { + "epoch": 0.4, + "grad_norm": 25.22844962359273, + "learning_rate": 3.786725291023728e-07, + "logits/generated": 1.9840850830078125, + "logits/oppo_generated": -2.7869467735290527, + "logits/oppo_real": -2.9065823554992676, + "logits/real": 0.32308337092399597, + "logps/generated": -601.5640869140625, + "logps/oppo_gen": -69.37512969970703, + "logps/oppo_real": -174.45965576171875, + "logps/real": -216.3216094970703, + "loss": 0.373, + "loss/gen": 0.06400664150714874, + "loss/real": 0.4876205623149872, + "rewards/accuracies": 1.0, + "rewards/generated": -532.18896484375, + "rewards/margins": 490.3270263671875, + "rewards/real": -41.861942291259766, + "step": 189 + }, + { + "epoch": 0.4, + "grad_norm": 36.253108029476245, + "learning_rate": 3.7710310482256523e-07, + "logits/generated": 4.182609558105469, + "logits/oppo_generated": -2.7686333656311035, + "logits/oppo_real": -2.657388687133789, + "logits/real": -0.6948004961013794, + "logps/generated": -576.32470703125, + "logps/oppo_gen": -78.72369384765625, + "logps/oppo_real": -240.1993408203125, + "logps/real": -226.7103729248047, + "loss": 0.3604, + "loss/gen": 0.044823385775089264, + "loss/real": 0.18352998793125153, + "rewards/accuracies": 1.0, + "rewards/generated": -497.60101318359375, + "rewards/margins": 511.0899658203125, + "rewards/real": 13.4889554977417, + "step": 190 + }, + { + "epoch": 0.4, + "grad_norm": 26.63954589090132, + "learning_rate": 3.7552689605501986e-07, + "logits/generated": 3.2763257026672363, + "logits/oppo_generated": -2.657681941986084, + "logits/oppo_real": -2.8181610107421875, + "logits/real": 0.7460372447967529, + "logps/generated": -595.9085693359375, + "logps/oppo_gen": -73.89129638671875, + "logps/oppo_real": -256.21337890625, + "logps/real": -287.32672119140625, + "loss": 0.3531, + "loss/gen": 0.07981202751398087, + "loss/real": 0.4028546214103699, + "rewards/accuracies": 1.0, + "rewards/generated": -522.017333984375, + "rewards/margins": 490.9039306640625, + "rewards/real": -31.113351821899414, + "step": 191 + }, + { + "epoch": 0.4, + "grad_norm": 31.893125399588957, + "learning_rate": 3.7394398693433794e-07, + "logits/generated": 3.7813591957092285, + "logits/oppo_generated": -2.6394004821777344, + "logits/oppo_real": -2.7031853199005127, + "logits/real": -1.6782422065734863, + "logps/generated": -640.29931640625, + "logps/oppo_gen": -71.51564025878906, + "logps/oppo_real": -315.9356994628906, + "logps/real": -289.91998291015625, + "loss": 0.3326, + "loss/gen": 0.09133920818567276, + "loss/real": 0.16474120318889618, + "rewards/accuracies": 1.0, + "rewards/generated": -568.78369140625, + "rewards/margins": 594.7993774414062, + "rewards/real": 26.015716552734375, + "step": 192 + }, + { + "epoch": 0.4, + "grad_norm": 29.62806962604255, + "learning_rate": 3.7235446195277136e-07, + "logits/generated": 2.645258665084839, + "logits/oppo_generated": -2.579573154449463, + "logits/oppo_real": -2.747130870819092, + "logits/real": 0.6542664766311646, + "logps/generated": -572.8811645507812, + "logps/oppo_gen": -65.53421020507812, + "logps/oppo_real": -227.9467010498047, + "logps/real": -214.3878936767578, + "loss": 0.2953, + "loss/gen": 0.07026851177215576, + "loss/real": 0.2094188630580902, + "rewards/accuracies": 1.0, + "rewards/generated": -507.346923828125, + "rewards/margins": 520.90576171875, + "rewards/real": 13.558823585510254, + "step": 193 + }, + { + "epoch": 0.41, + "grad_norm": 34.85229715797081, + "learning_rate": 3.7075840595571194e-07, + "logits/generated": 3.5314037799835205, + "logits/oppo_generated": -2.5842761993408203, + "logits/oppo_real": -2.4687318801879883, + "logits/real": 0.21015453338623047, + "logps/generated": -634.6973876953125, + "logps/oppo_gen": -81.10606384277344, + "logps/oppo_real": -293.9441833496094, + "logps/real": -280.90740966796875, + "loss": 0.3575, + "loss/gen": 0.03518790006637573, + "loss/real": 0.19670113921165466, + "rewards/accuracies": 1.0, + "rewards/generated": -553.59130859375, + "rewards/margins": 566.628173828125, + "rewards/real": 13.03675651550293, + "step": 194 + }, + { + "epoch": 0.41, + "grad_norm": 23.25440855171001, + "learning_rate": 3.691559041371631e-07, + "logits/generated": 4.113855838775635, + "logits/oppo_generated": -2.7571568489074707, + "logits/oppo_real": -2.7542152404785156, + "logits/real": -0.6659407615661621, + "logps/generated": -763.6599731445312, + "logps/oppo_gen": -82.74861145019531, + "logps/oppo_real": -304.05267333984375, + "logps/real": -311.4844970703125, + "loss": 0.2873, + "loss/gen": 0.03204440325498581, + "loss/real": 0.2777453064918518, + "rewards/accuracies": 1.0, + "rewards/generated": -680.911376953125, + "rewards/margins": 673.4795532226562, + "rewards/real": -7.431818962097168, + "step": 195 + }, + { + "epoch": 0.41, + "grad_norm": 39.75584359071475, + "learning_rate": 3.6754704203519204e-07, + "logits/generated": 2.313502788543701, + "logits/oppo_generated": -2.8141732215881348, + "logits/oppo_real": -2.7166080474853516, + "logits/real": -0.3117219805717468, + "logps/generated": -692.5143432617188, + "logps/oppo_gen": -85.08738708496094, + "logps/oppo_real": -330.571533203125, + "logps/real": -318.1523742675781, + "loss": 0.353, + "loss/gen": 0.027993863448500633, + "loss/real": 0.19043594598770142, + "rewards/accuracies": 1.0, + "rewards/generated": -607.4268798828125, + "rewards/margins": 619.8460693359375, + "rewards/real": 12.419178009033203, + "step": 196 + }, + { + "epoch": 0.41, + "grad_norm": 30.445406722974546, + "learning_rate": 3.659319055273644e-07, + "logits/generated": 1.316972255706787, + "logits/oppo_generated": -2.779865264892578, + "logits/oppo_real": -2.7747716903686523, + "logits/real": -1.5569334030151367, + "logps/generated": -624.56494140625, + "logps/oppo_gen": -87.4921646118164, + "logps/oppo_real": -331.7750244140625, + "logps/real": -331.03033447265625, + "loss": 0.3985, + "loss/gen": 0.06687048077583313, + "loss/real": 0.3081396222114563, + "rewards/accuracies": 1.0, + "rewards/generated": -537.07275390625, + "rewards/margins": 537.8175048828125, + "rewards/real": 0.7447280883789062, + "step": 197 + }, + { + "epoch": 0.41, + "grad_norm": 32.295338542916106, + "learning_rate": 3.643105808261596e-07, + "logits/generated": 1.9061369895935059, + "logits/oppo_generated": -2.7027125358581543, + "logits/oppo_real": -2.902536392211914, + "logits/real": -0.43068477511405945, + "logps/generated": -520.6417846679688, + "logps/oppo_gen": -55.610694885253906, + "logps/oppo_real": -240.93789672851562, + "logps/real": -245.14500427246094, + "loss": 0.3222, + "loss/gen": 0.1180552989244461, + "loss/real": 0.30835291743278503, + "rewards/accuracies": 0.9375, + "rewards/generated": -465.03106689453125, + "rewards/margins": 460.823974609375, + "rewards/real": -4.207094192504883, + "step": 198 + }, + { + "epoch": 0.42, + "grad_norm": 34.458162896973704, + "learning_rate": 3.626831544743697e-07, + "logits/generated": 2.6215648651123047, + "logits/oppo_generated": -2.5582261085510254, + "logits/oppo_real": -2.8184330463409424, + "logits/real": -1.3916691541671753, + "logps/generated": -576.6791381835938, + "logps/oppo_gen": -63.67759704589844, + "logps/oppo_real": -285.08526611328125, + "logps/real": -287.012939453125, + "loss": 0.4403, + "loss/gen": 0.15199309587478638, + "loss/real": 0.25518083572387695, + "rewards/accuracies": 1.0, + "rewards/generated": -513.00146484375, + "rewards/margins": 511.0738525390625, + "rewards/real": -1.9276609420776367, + "step": 199 + }, + { + "epoch": 0.42, + "grad_norm": 22.067206639869912, + "learning_rate": 3.610497133404795e-07, + "logits/generated": 2.2441086769104004, + "logits/oppo_generated": -2.7718663215637207, + "logits/oppo_real": -2.604949951171875, + "logits/real": -1.599900722503662, + "logps/generated": -721.7673950195312, + "logps/oppo_gen": -86.70479583740234, + "logps/oppo_real": -356.1251220703125, + "logps/real": -336.54754638671875, + "loss": 0.3413, + "loss/gen": 0.0732014924287796, + "loss/real": 0.17360037565231323, + "rewards/accuracies": 1.0, + "rewards/generated": -635.0626220703125, + "rewards/margins": 654.64013671875, + "rewards/real": 19.577552795410156, + "step": 200 + }, + { + "epoch": 0.42, + "grad_norm": 23.872160897538553, + "learning_rate": 3.594103446140297e-07, + "logits/generated": 1.501354455947876, + "logits/oppo_generated": -2.8155646324157715, + "logits/oppo_real": -2.766092538833618, + "logits/real": -1.9259686470031738, + "logps/generated": -654.544189453125, + "logps/oppo_gen": -73.81175231933594, + "logps/oppo_real": -317.19415283203125, + "logps/real": -305.828857421875, + "loss": 0.345, + "loss/gen": 0.13120338320732117, + "loss/real": 0.18511003255844116, + "rewards/accuracies": 1.0, + "rewards/generated": -580.7324829101562, + "rewards/margins": 592.0977783203125, + "rewards/real": 11.36532974243164, + "step": 201 + }, + { + "epoch": 0.42, + "grad_norm": 37.714581376935634, + "learning_rate": 3.5776513580096313e-07, + "logits/generated": 1.715670108795166, + "logits/oppo_generated": -2.750485420227051, + "logits/oppo_real": -2.715085983276367, + "logits/real": -1.2266333103179932, + "logps/generated": -681.1434936523438, + "logps/oppo_gen": -89.23578643798828, + "logps/oppo_real": -368.6956481933594, + "logps/real": -347.9045104980469, + "loss": 0.3083, + "loss/gen": 0.0492560900747776, + "loss/real": 0.1700318455696106, + "rewards/accuracies": 1.0, + "rewards/generated": -591.90771484375, + "rewards/margins": 612.6989135742188, + "rewards/real": 20.7911376953125, + "step": 202 + }, + { + "epoch": 0.42, + "grad_norm": 34.95580007644475, + "learning_rate": 3.5611417471895376e-07, + "logits/generated": 2.565880298614502, + "logits/oppo_generated": -2.6933023929595947, + "logits/oppo_real": -2.7496652603149414, + "logits/real": -0.9516716599464417, + "logps/generated": -719.45947265625, + "logps/oppo_gen": -51.82988739013672, + "logps/oppo_real": -141.92869567871094, + "logps/real": -167.80970764160156, + "loss": 0.3703, + "loss/gen": 0.06500092148780823, + "loss/real": 0.403279185295105, + "rewards/accuracies": 1.0, + "rewards/generated": -667.629638671875, + "rewards/margins": 641.7485961914062, + "rewards/real": -25.881032943725586, + "step": 203 + }, + { + "epoch": 0.43, + "grad_norm": 52.17167377918668, + "learning_rate": 3.5445754949271924e-07, + "logits/generated": 2.1220145225524902, + "logits/oppo_generated": -2.9500808715820312, + "logits/oppo_real": -2.7065343856811523, + "logits/real": -2.146017551422119, + "logps/generated": -680.4698486328125, + "logps/oppo_gen": -90.31026458740234, + "logps/oppo_real": -439.34735107421875, + "logps/real": -405.097900390625, + "loss": 0.43, + "loss/gen": 0.11432987451553345, + "loss/real": 0.14855104684829712, + "rewards/accuracies": 1.0, + "rewards/generated": -590.1595458984375, + "rewards/margins": 624.4090576171875, + "rewards/real": 34.24946594238281, + "step": 204 + }, + { + "epoch": 0.43, + "grad_norm": 42.23183145318973, + "learning_rate": 3.5279534854931674e-07, + "logits/generated": 2.65057373046875, + "logits/oppo_generated": -2.7080841064453125, + "logits/oppo_real": -2.7903661727905273, + "logits/real": -0.8641007542610168, + "logps/generated": -726.29443359375, + "logps/oppo_gen": -78.16770935058594, + "logps/oppo_real": -221.12213134765625, + "logps/real": -214.3006134033203, + "loss": 0.36, + "loss/gen": 0.03191296383738518, + "loss/real": 0.19759435951709747, + "rewards/accuracies": 1.0, + "rewards/generated": -648.126708984375, + "rewards/margins": 654.9482421875, + "rewards/real": 6.82151460647583, + "step": 205 + }, + { + "epoch": 0.43, + "grad_norm": 44.82334166489389, + "learning_rate": 3.511276606134234e-07, + "logits/generated": 2.2213454246520996, + "logits/oppo_generated": -2.7255749702453613, + "logits/oppo_real": -2.866260528564453, + "logits/real": -1.4539179801940918, + "logps/generated": -683.35986328125, + "logps/oppo_gen": -71.52497863769531, + "logps/oppo_real": -245.4722900390625, + "logps/real": -217.90753173828125, + "loss": 0.3083, + "loss/gen": 0.02118522860109806, + "loss/real": 0.1588922142982483, + "rewards/accuracies": 1.0, + "rewards/generated": -611.8348999023438, + "rewards/margins": 639.3995971679688, + "rewards/real": 27.564727783203125, + "step": 206 + }, + { + "epoch": 0.43, + "grad_norm": 89.52327625923607, + "learning_rate": 3.4945457470259987e-07, + "logits/generated": 2.679196834564209, + "logits/oppo_generated": -2.712146759033203, + "logits/oppo_real": -2.5725181102752686, + "logits/real": -1.3774299621582031, + "logps/generated": -809.5963745117188, + "logps/oppo_gen": -95.62235260009766, + "logps/oppo_real": -342.45953369140625, + "logps/real": -347.9728088378906, + "loss": 0.3979, + "loss/gen": 0.012037093751132488, + "loss/real": 0.31207871437072754, + "rewards/accuracies": 0.9375, + "rewards/generated": -713.9739990234375, + "rewards/margins": 708.460693359375, + "rewards/real": -5.513291358947754, + "step": 207 + }, + { + "epoch": 0.44, + "grad_norm": 33.33843385456191, + "learning_rate": 3.4777618012253895e-07, + "logits/generated": 0.3462332487106323, + "logits/oppo_generated": -2.733717441558838, + "logits/oppo_real": -2.6825289726257324, + "logits/real": -1.5442677736282349, + "logps/generated": -749.8042602539062, + "logps/oppo_gen": -92.93001556396484, + "logps/oppo_real": -297.9956359863281, + "logps/real": -312.69561767578125, + "loss": 0.3289, + "loss/gen": 0.025040730834007263, + "loss/real": 0.3518640995025635, + "rewards/accuracies": 0.9375, + "rewards/generated": -656.874267578125, + "rewards/margins": 642.1741943359375, + "rewards/real": -14.699989318847656, + "step": 208 + }, + { + "epoch": 0.44, + "grad_norm": 32.84499135380081, + "learning_rate": 3.4609256646229903e-07, + "logits/generated": 0.9510908126831055, + "logits/oppo_generated": -2.6556482315063477, + "logits/oppo_real": -2.5293190479278564, + "logits/real": -1.8658448457717896, + "logps/generated": -683.0816650390625, + "logps/oppo_gen": -99.02784729003906, + "logps/oppo_real": -402.88189697265625, + "logps/real": -354.7418212890625, + "loss": 0.2956, + "loss/gen": 0.06923627853393555, + "loss/real": 0.1563805490732193, + "rewards/accuracies": 1.0, + "rewards/generated": -584.0538330078125, + "rewards/margins": 632.1939697265625, + "rewards/real": 48.140071868896484, + "step": 209 + }, + { + "epoch": 0.44, + "grad_norm": 49.95857170415053, + "learning_rate": 3.4440382358952115e-07, + "logits/generated": 0.15448346734046936, + "logits/oppo_generated": -2.720691680908203, + "logits/oppo_real": -2.57096529006958, + "logits/real": -1.851238489151001, + "logps/generated": -605.6126708984375, + "logps/oppo_gen": -92.30026245117188, + "logps/oppo_real": -256.4493408203125, + "logps/real": -253.73361206054688, + "loss": 0.3509, + "loss/gen": 0.21256473660469055, + "loss/real": 0.2324458658695221, + "rewards/accuracies": 1.0, + "rewards/generated": -513.3124389648438, + "rewards/margins": 516.0281372070312, + "rewards/real": 2.71573543548584, + "step": 210 + }, + { + "epoch": 0.44, + "grad_norm": 36.08240825591988, + "learning_rate": 3.4271004164563294e-07, + "logits/generated": -0.19886772334575653, + "logits/oppo_generated": -2.9380054473876953, + "logits/oppo_real": -2.9733734130859375, + "logits/real": -2.3697614669799805, + "logps/generated": -543.4454956054688, + "logps/oppo_gen": -72.3819808959961, + "logps/oppo_real": -333.6939697265625, + "logps/real": -319.1031799316406, + "loss": 0.3138, + "loss/gen": 0.3100494146347046, + "loss/real": 0.18639668822288513, + "rewards/accuracies": 1.0, + "rewards/generated": -471.06353759765625, + "rewards/margins": 485.65435791015625, + "rewards/real": 14.590840339660645, + "step": 211 + }, + { + "epoch": 0.44, + "grad_norm": 32.91568040617667, + "learning_rate": 3.410113110410366e-07, + "logits/generated": 0.19806131720542908, + "logits/oppo_generated": -2.8069820404052734, + "logits/oppo_real": -2.969247341156006, + "logits/real": -2.11154842376709, + "logps/generated": -629.618896484375, + "logps/oppo_gen": -69.23855590820312, + "logps/oppo_real": -293.89312744140625, + "logps/real": -266.492431640625, + "loss": 0.3423, + "loss/gen": 0.048031456768512726, + "loss/real": 0.15945301949977875, + "rewards/accuracies": 1.0, + "rewards/generated": -560.38037109375, + "rewards/margins": 587.7811279296875, + "rewards/real": 27.400691986083984, + "step": 212 + }, + { + "epoch": 0.45, + "grad_norm": 38.56999521973007, + "learning_rate": 3.3930772245028317e-07, + "logits/generated": 1.8033071756362915, + "logits/oppo_generated": -2.7724642753601074, + "logits/oppo_real": -2.654409885406494, + "logits/real": -1.6350951194763184, + "logps/generated": -644.2208251953125, + "logps/oppo_gen": -77.64331817626953, + "logps/oppo_real": -251.16159057617188, + "logps/real": -259.20703125, + "loss": 0.3801, + "loss/gen": 0.061860501766204834, + "loss/real": 0.3248833417892456, + "rewards/accuracies": 1.0, + "rewards/generated": -566.5775146484375, + "rewards/margins": 558.5321044921875, + "rewards/real": -8.045448303222656, + "step": 213 + }, + { + "epoch": 0.45, + "grad_norm": 27.540038768156492, + "learning_rate": 3.3759936680723233e-07, + "logits/generated": 1.1407092809677124, + "logits/oppo_generated": -2.727145195007324, + "logits/oppo_real": -2.6046769618988037, + "logits/real": -1.5037585496902466, + "logps/generated": -647.4967041015625, + "logps/oppo_gen": -80.66487121582031, + "logps/oppo_real": -269.0631408691406, + "logps/real": -247.17236328125, + "loss": 0.3, + "loss/gen": 0.13781431317329407, + "loss/real": 0.25224390625953674, + "rewards/accuracies": 1.0, + "rewards/generated": -566.8319091796875, + "rewards/margins": 588.72265625, + "rewards/real": 21.890777587890625, + "step": 214 + }, + { + "epoch": 0.45, + "grad_norm": 34.52746196700262, + "learning_rate": 3.3588633530019866e-07, + "logits/generated": 1.7109321355819702, + "logits/oppo_generated": -2.8297104835510254, + "logits/oppo_real": -2.8279013633728027, + "logits/real": -1.996672511100769, + "logps/generated": -689.519775390625, + "logps/oppo_gen": -81.06492614746094, + "logps/oppo_real": -267.75341796875, + "logps/real": -262.6698913574219, + "loss": 0.3261, + "loss/gen": 0.15056224167346954, + "loss/real": 0.2486058473587036, + "rewards/accuracies": 1.0, + "rewards/generated": -608.454833984375, + "rewards/margins": 613.5383911132812, + "rewards/real": 5.083520889282227, + "step": 215 + }, + { + "epoch": 0.45, + "grad_norm": 30.925401082749215, + "learning_rate": 3.341687193670843e-07, + "logits/generated": 1.0987327098846436, + "logits/oppo_generated": -2.6814069747924805, + "logits/oppo_real": -3.02249813079834, + "logits/real": -1.2881546020507812, + "logps/generated": -674.28515625, + "logps/oppo_gen": -64.46792602539062, + "logps/oppo_real": -302.4736328125, + "logps/real": -339.430419921875, + "loss": 0.3165, + "loss/gen": 0.03633798286318779, + "loss/real": 0.5284652709960938, + "rewards/accuracies": 1.0, + "rewards/generated": -609.8171997070312, + "rewards/margins": 572.8603515625, + "rewards/real": -36.95682907104492, + "step": 216 + }, + { + "epoch": 0.45, + "grad_norm": 50.21994106397129, + "learning_rate": 3.3244661069049806e-07, + "logits/generated": 1.24822998046875, + "logits/oppo_generated": -2.7858104705810547, + "logits/oppo_real": -2.6971521377563477, + "logits/real": -1.740341067314148, + "logps/generated": -610.5947265625, + "logps/oppo_gen": -71.21235656738281, + "logps/oppo_real": -320.8017578125, + "logps/real": -311.95843505859375, + "loss": 0.3234, + "loss/gen": 0.07200797647237778, + "loss/real": 0.25545698404312134, + "rewards/accuracies": 1.0, + "rewards/generated": -539.38232421875, + "rewards/margins": 548.2257080078125, + "rewards/real": 8.84335708618164, + "step": 217 + }, + { + "epoch": 0.46, + "grad_norm": 49.23734271839566, + "learning_rate": 3.3072010119286155e-07, + "logits/generated": 1.5630497932434082, + "logits/oppo_generated": -2.6927084922790527, + "logits/oppo_real": -2.880934715270996, + "logits/real": -1.2201228141784668, + "logps/generated": -623.26708984375, + "logps/oppo_gen": -74.21060180664062, + "logps/oppo_real": -299.62982177734375, + "logps/real": -348.39190673828125, + "loss": 0.4195, + "loss/gen": 0.14255878329277039, + "loss/real": 0.5940058827400208, + "rewards/accuracies": 0.875, + "rewards/generated": -549.0565185546875, + "rewards/margins": 500.2944030761719, + "rewards/real": -48.7620964050293, + "step": 218 + }, + { + "epoch": 0.46, + "grad_norm": 22.2264169591017, + "learning_rate": 3.289892830315028e-07, + "logits/generated": 4.247753620147705, + "logits/oppo_generated": -2.771029472351074, + "logits/oppo_real": -2.5227415561676025, + "logits/real": -1.2131158113479614, + "logps/generated": -623.5982666015625, + "logps/oppo_gen": -62.89678955078125, + "logps/oppo_real": -163.27670288085938, + "logps/real": -179.0076141357422, + "loss": 0.3323, + "loss/gen": 0.07575614750385284, + "loss/real": 0.32401931285858154, + "rewards/accuracies": 1.0, + "rewards/generated": -560.7014770507812, + "rewards/margins": 544.9705810546875, + "rewards/real": -15.730911254882812, + "step": 219 + }, + { + "epoch": 0.46, + "grad_norm": 27.218516790045705, + "learning_rate": 3.272542485937368e-07, + "logits/generated": -0.3621227741241455, + "logits/oppo_generated": -2.86175799369812, + "logits/oppo_real": -2.841768503189087, + "logits/real": -2.256178855895996, + "logps/generated": -679.428466796875, + "logps/oppo_gen": -74.02748107910156, + "logps/oppo_real": -279.630859375, + "logps/real": -263.0206298828125, + "loss": 0.3413, + "loss/gen": 0.11687298119068146, + "loss/real": 0.18065857887268066, + "rewards/accuracies": 1.0, + "rewards/generated": -605.4010620117188, + "rewards/margins": 622.0112915039062, + "rewards/real": 16.610258102416992, + "step": 220 + }, + { + "epoch": 0.46, + "grad_norm": 26.52582141348607, + "learning_rate": 3.2551509049193444e-07, + "logits/generated": -0.09197130799293518, + "logits/oppo_generated": -2.889202356338501, + "logits/oppo_real": -2.841768741607666, + "logits/real": -2.298105001449585, + "logps/generated": -709.23681640625, + "logps/oppo_gen": -96.27604675292969, + "logps/oppo_real": -258.4027099609375, + "logps/real": -264.6490173339844, + "loss": 0.3246, + "loss/gen": 0.029492512345314026, + "loss/real": 0.24392205476760864, + "rewards/accuracies": 1.0, + "rewards/generated": -612.9607543945312, + "rewards/margins": 606.7144775390625, + "rewards/real": -6.246295928955078, + "step": 221 + }, + { + "epoch": 0.46, + "grad_norm": 34.78987391682295, + "learning_rate": 3.2377190155857864e-07, + "logits/generated": 0.3520706295967102, + "logits/oppo_generated": -2.6503279209136963, + "logits/oppo_real": -2.805267333984375, + "logits/real": -1.7184969186782837, + "logps/generated": -657.2587890625, + "logps/oppo_gen": -77.18238067626953, + "logps/oppo_real": -260.9399108886719, + "logps/real": -246.47340393066406, + "loss": 0.2943, + "loss/gen": 0.1653580665588379, + "loss/real": 0.19109240174293518, + "rewards/accuracies": 0.9375, + "rewards/generated": -580.076416015625, + "rewards/margins": 594.5429077148438, + "rewards/real": 14.466522216796875, + "step": 222 + }, + { + "epoch": 0.47, + "grad_norm": 45.26956478798697, + "learning_rate": 3.220247748413094e-07, + "logits/generated": 0.03710488975048065, + "logits/oppo_generated": -2.5938522815704346, + "logits/oppo_real": -2.520634651184082, + "logits/real": -1.9206852912902832, + "logps/generated": -496.4818115234375, + "logps/oppo_gen": -65.14288330078125, + "logps/oppo_real": -270.13726806640625, + "logps/real": -247.87417602539062, + "loss": 0.4573, + "loss/gen": 0.1664436161518097, + "loss/real": 0.16675496101379395, + "rewards/accuracies": 1.0, + "rewards/generated": -431.3388977050781, + "rewards/margins": 453.6019592285156, + "rewards/real": 22.263086318969727, + "step": 223 + }, + { + "epoch": 0.47, + "grad_norm": 39.78434296695749, + "learning_rate": 3.2027380359795706e-07, + "logits/generated": -0.20777527987957, + "logits/oppo_generated": -2.839998245239258, + "logits/oppo_real": -2.971149444580078, + "logits/real": -2.094723701477051, + "logps/generated": -697.270263671875, + "logps/oppo_gen": -77.53987121582031, + "logps/oppo_real": -300.1747741699219, + "logps/real": -289.88665771484375, + "loss": 0.3072, + "loss/gen": 0.017790913581848145, + "loss/real": 0.18654951453208923, + "rewards/accuracies": 1.0, + "rewards/generated": -619.7303466796875, + "rewards/margins": 630.0185546875, + "rewards/real": 10.28813362121582, + "step": 224 + }, + { + "epoch": 0.47, + "grad_norm": 38.24634090698661, + "learning_rate": 3.185190812915646e-07, + "logits/generated": 0.6704794764518738, + "logits/oppo_generated": -2.6874566078186035, + "logits/oppo_real": -2.7883381843566895, + "logits/real": -1.6932792663574219, + "logps/generated": -531.7930908203125, + "logps/oppo_gen": -65.7908935546875, + "logps/oppo_real": -203.2254638671875, + "logps/real": -217.8573455810547, + "loss": 0.3306, + "loss/gen": 0.25845158100128174, + "loss/real": 0.3562762141227722, + "rewards/accuracies": 0.9375, + "rewards/generated": -466.002197265625, + "rewards/margins": 451.37030029296875, + "rewards/real": -14.631880760192871, + "step": 225 + }, + { + "epoch": 0.47, + "grad_norm": 28.64358490228986, + "learning_rate": 3.167607015853983e-07, + "logits/generated": 0.9228378534317017, + "logits/oppo_generated": -2.804375171661377, + "logits/oppo_real": -2.7193827629089355, + "logits/real": -2.1706273555755615, + "logps/generated": -688.7299194335938, + "logps/oppo_gen": -84.42399597167969, + "logps/oppo_real": -334.47344970703125, + "logps/real": -320.31610107421875, + "loss": 0.3595, + "loss/gen": 0.07177238911390305, + "loss/real": 0.17948225140571594, + "rewards/accuracies": 1.0, + "rewards/generated": -604.305908203125, + "rewards/margins": 618.4632568359375, + "rewards/real": 14.157352447509766, + "step": 226 + }, + { + "epoch": 0.47, + "grad_norm": 34.51988819020222, + "learning_rate": 3.149987583379485e-07, + "logits/generated": 1.21856689453125, + "logits/oppo_generated": -2.7994980812072754, + "logits/oppo_real": -2.839235782623291, + "logits/real": -2.0786094665527344, + "logps/generated": -732.9527587890625, + "logps/oppo_gen": -77.09896850585938, + "logps/oppo_real": -191.4404296875, + "logps/real": -195.81341552734375, + "loss": 0.3501, + "loss/gen": 0.14099054038524628, + "loss/real": 0.2566527724266052, + "rewards/accuracies": 0.9375, + "rewards/generated": -655.8538208007812, + "rewards/margins": 651.4808349609375, + "rewards/real": -4.372990131378174, + "step": 227 + }, + { + "epoch": 0.48, + "grad_norm": 68.5395215596447, + "learning_rate": 3.1323334559792015e-07, + "logits/generated": 0.6095637083053589, + "logits/oppo_generated": -2.798116683959961, + "logits/oppo_real": -2.8187661170959473, + "logits/real": -2.233832836151123, + "logps/generated": -621.18408203125, + "logps/oppo_gen": -64.57658386230469, + "logps/oppo_real": -321.273193359375, + "logps/real": -329.33807373046875, + "loss": 0.3941, + "loss/gen": 0.04750348627567291, + "loss/real": 0.34501129388809204, + "rewards/accuracies": 0.9375, + "rewards/generated": -556.607421875, + "rewards/margins": 548.5426025390625, + "rewards/real": -8.064876556396484, + "step": 228 + }, + { + "epoch": 0.48, + "grad_norm": 28.440128012153114, + "learning_rate": 3.114645575992116e-07, + "logits/generated": 1.1552635431289673, + "logits/oppo_generated": -2.8417534828186035, + "logits/oppo_real": -2.915761947631836, + "logits/real": -1.9439736604690552, + "logps/generated": -694.2273559570312, + "logps/oppo_gen": -83.11656188964844, + "logps/oppo_real": -318.304443359375, + "logps/real": -300.11590576171875, + "loss": 0.308, + "loss/gen": 0.08618447184562683, + "loss/real": 0.2467232197523117, + "rewards/accuracies": 1.0, + "rewards/generated": -611.11083984375, + "rewards/margins": 629.29931640625, + "rewards/real": 18.188520431518555, + "step": 229 + }, + { + "epoch": 0.48, + "grad_norm": 42.474537121797304, + "learning_rate": 3.096924887558854e-07, + "logits/generated": 0.5934816002845764, + "logits/oppo_generated": -2.716702461242676, + "logits/oppo_real": -2.8514609336853027, + "logits/real": -1.4103012084960938, + "logps/generated": -645.125244140625, + "logps/oppo_gen": -69.85491943359375, + "logps/oppo_real": -260.97369384765625, + "logps/real": -300.82269287109375, + "loss": 0.3277, + "loss/gen": 0.1709042340517044, + "loss/real": 0.5244534015655518, + "rewards/accuracies": 0.875, + "rewards/generated": -575.270263671875, + "rewards/margins": 535.4212646484375, + "rewards/real": -39.84899139404297, + "step": 230 + }, + { + "epoch": 0.48, + "grad_norm": 37.19761384263697, + "learning_rate": 3.079172336571286e-07, + "logits/generated": 1.9427441358566284, + "logits/oppo_generated": -2.795680522918701, + "logits/oppo_real": -2.8359665870666504, + "logits/real": -1.5810625553131104, + "logps/generated": -670.4180908203125, + "logps/oppo_gen": -72.59526062011719, + "logps/oppo_real": -213.2947998046875, + "logps/real": -207.31228637695312, + "loss": 0.3329, + "loss/gen": 0.07098525762557983, + "loss/real": 0.22547532618045807, + "rewards/accuracies": 1.0, + "rewards/generated": -597.8228759765625, + "rewards/margins": 603.805419921875, + "rewards/real": 5.982503414154053, + "step": 231 + }, + { + "epoch": 0.49, + "grad_norm": 27.7476288753314, + "learning_rate": 3.061388870622033e-07, + "logits/generated": 0.8548299670219421, + "logits/oppo_generated": -2.654226303100586, + "logits/oppo_real": -2.6452994346618652, + "logits/real": -1.6692825555801392, + "logps/generated": -665.5245361328125, + "logps/oppo_gen": -83.74305725097656, + "logps/oppo_real": -318.2536315917969, + "logps/real": -321.6038513183594, + "loss": 0.3093, + "loss/gen": 0.05284074321389198, + "loss/real": 0.29021862149238586, + "rewards/accuracies": 1.0, + "rewards/generated": -581.781494140625, + "rewards/margins": 578.4312133789062, + "rewards/real": -3.350205421447754, + "step": 232 + }, + { + "epoch": 0.49, + "grad_norm": 36.705464684366994, + "learning_rate": 3.0435754389538925e-07, + "logits/generated": 1.5102429389953613, + "logits/oppo_generated": -2.7181339263916016, + "logits/oppo_real": -2.965839385986328, + "logits/real": -1.0968390703201294, + "logps/generated": -595.69189453125, + "logps/oppo_gen": -67.46559143066406, + "logps/oppo_real": -231.64990234375, + "logps/real": -232.36026000976562, + "loss": 0.3101, + "loss/gen": 0.11512690782546997, + "loss/real": 0.2681490182876587, + "rewards/accuracies": 0.9375, + "rewards/generated": -528.226318359375, + "rewards/margins": 527.5159301757812, + "rewards/real": -0.710362434387207, + "step": 233 + }, + { + "epoch": 0.49, + "grad_norm": 25.163564057078393, + "learning_rate": 3.0257329924091654e-07, + "logits/generated": 2.2034385204315186, + "logits/oppo_generated": -2.7173333168029785, + "logits/oppo_real": -2.6980838775634766, + "logits/real": -1.6483628749847412, + "logps/generated": -850.8960571289062, + "logps/oppo_gen": -86.72967529296875, + "logps/oppo_real": -260.89862060546875, + "logps/real": -256.45538330078125, + "loss": 0.3215, + "loss/gen": 0.03136850893497467, + "loss/real": 0.23244068026542664, + "rewards/accuracies": 1.0, + "rewards/generated": -764.1663818359375, + "rewards/margins": 768.609619140625, + "rewards/real": 4.443211555480957, + "step": 234 + }, + { + "epoch": 0.49, + "grad_norm": 30.334419426669847, + "learning_rate": 3.007862483378906e-07, + "logits/generated": 0.15867102146148682, + "logits/oppo_generated": -2.742459774017334, + "logits/oppo_real": -2.9399333000183105, + "logits/real": -2.043656587600708, + "logps/generated": -684.0328369140625, + "logps/oppo_gen": -86.16322326660156, + "logps/oppo_real": -312.35125732421875, + "logps/real": -311.46185302734375, + "loss": 0.2987, + "loss/gen": 0.050678517669439316, + "loss/real": 0.25761735439300537, + "rewards/accuracies": 1.0, + "rewards/generated": -597.86962890625, + "rewards/margins": 598.759033203125, + "rewards/real": 0.8893804550170898, + "step": 235 + }, + { + "epoch": 0.49, + "grad_norm": 25.297935383504495, + "learning_rate": 2.989964865752079e-07, + "logits/generated": 1.1529502868652344, + "logits/oppo_generated": -2.7969439029693604, + "logits/oppo_real": -2.782660484313965, + "logits/real": -2.0899152755737305, + "logps/generated": -697.8406982421875, + "logps/oppo_gen": -88.234375, + "logps/oppo_real": -255.04251098632812, + "logps/real": -238.53860473632812, + "loss": 0.3265, + "loss/gen": 0.04399724677205086, + "loss/real": 0.2120170146226883, + "rewards/accuracies": 1.0, + "rewards/generated": -609.6063232421875, + "rewards/margins": 626.1102294921875, + "rewards/real": 16.50393295288086, + "step": 236 + }, + { + "epoch": 0.5, + "grad_norm": 31.417229307551633, + "learning_rate": 2.97204109486465e-07, + "logits/generated": 1.7922279834747314, + "logits/oppo_generated": -2.7455062866210938, + "logits/oppo_real": -2.7361059188842773, + "logits/real": -1.599406361579895, + "logps/generated": -667.8589477539062, + "logps/oppo_gen": -81.91145324707031, + "logps/oppo_real": -264.510498046875, + "logps/real": -259.50238037109375, + "loss": 0.3382, + "loss/gen": 0.08875064551830292, + "loss/real": 0.21604704856872559, + "rewards/accuracies": 1.0, + "rewards/generated": -585.9474487304688, + "rewards/margins": 590.95556640625, + "rewards/real": 5.00810432434082, + "step": 237 + }, + { + "epoch": 0.5, + "grad_norm": 36.81499852248995, + "learning_rate": 2.954092127448591e-07, + "logits/generated": 0.43488985300064087, + "logits/oppo_generated": -2.644202709197998, + "logits/oppo_real": -2.7387442588806152, + "logits/real": -1.5985989570617676, + "logps/generated": -613.1663818359375, + "logps/oppo_gen": -65.2353744506836, + "logps/oppo_real": -209.83143615722656, + "logps/real": -235.2373046875, + "loss": 0.3054, + "loss/gen": 0.07669935375452042, + "loss/real": 0.38457173109054565, + "rewards/accuracies": 1.0, + "rewards/generated": -547.9310302734375, + "rewards/margins": 522.525146484375, + "rewards/real": -25.405866622924805, + "step": 238 + }, + { + "epoch": 0.5, + "grad_norm": 68.19579535357184, + "learning_rate": 2.9361189215808057e-07, + "logits/generated": 2.575610399246216, + "logits/oppo_generated": -2.8439998626708984, + "logits/oppo_real": -2.853848934173584, + "logits/real": -1.4824180603027344, + "logps/generated": -647.5440063476562, + "logps/oppo_gen": -75.34915161132812, + "logps/oppo_real": -284.36083984375, + "logps/real": -330.95611572265625, + "loss": 0.3659, + "loss/gen": 0.05739718675613403, + "loss/real": 0.6472922563552856, + "rewards/accuracies": 0.9375, + "rewards/generated": -572.1948852539062, + "rewards/margins": 525.599609375, + "rewards/real": -46.595272064208984, + "step": 239 + }, + { + "epoch": 0.5, + "grad_norm": 47.93619874782131, + "learning_rate": 2.9181224366319943e-07, + "logits/generated": 0.38327261805534363, + "logits/oppo_generated": -2.768341064453125, + "logits/oppo_real": -2.7443935871124268, + "logits/real": -1.9321752786636353, + "logps/generated": -622.6148681640625, + "logps/oppo_gen": -68.6933822631836, + "logps/oppo_real": -232.75717163085938, + "logps/real": -214.53204345703125, + "loss": 0.3302, + "loss/gen": 0.0978286862373352, + "loss/real": 0.20635342597961426, + "rewards/accuracies": 1.0, + "rewards/generated": -553.9214477539062, + "rewards/margins": 572.1466064453125, + "rewards/real": 18.225135803222656, + "step": 240 + }, + { + "epoch": 0.5, + "grad_norm": 30.52824219927089, + "learning_rate": 2.900103633215447e-07, + "logits/generated": 0.9182009100914001, + "logits/oppo_generated": -2.757927417755127, + "logits/oppo_real": -2.7806620597839355, + "logits/real": -1.9723321199417114, + "logps/generated": -639.5178833007812, + "logps/oppo_gen": -86.59483337402344, + "logps/oppo_real": -236.5889434814453, + "logps/real": -256.336181640625, + "loss": 0.2817, + "loss/gen": 0.08502039313316345, + "loss/real": 0.28419214487075806, + "rewards/accuracies": 0.9375, + "rewards/generated": -552.9230346679688, + "rewards/margins": 533.1757202148438, + "rewards/real": -19.747272491455078, + "step": 241 + }, + { + "epoch": 0.51, + "grad_norm": 39.09951839687015, + "learning_rate": 2.882063473135763e-07, + "logits/generated": 1.0408952236175537, + "logits/oppo_generated": -2.827584743499756, + "logits/oppo_real": -2.9590084552764893, + "logits/real": -2.2290215492248535, + "logps/generated": -623.004638671875, + "logps/oppo_gen": -68.2485580444336, + "logps/oppo_real": -362.74945068359375, + "logps/real": -344.467529296875, + "loss": 0.3063, + "loss/gen": 0.09477907419204712, + "loss/real": 0.18900375068187714, + "rewards/accuracies": 1.0, + "rewards/generated": -554.756103515625, + "rewards/margins": 573.0380859375, + "rewards/real": 18.281963348388672, + "step": 242 + }, + { + "epoch": 0.51, + "grad_norm": 45.05031908877006, + "learning_rate": 2.864002919337512e-07, + "logits/generated": 0.8644614219665527, + "logits/oppo_generated": -2.783228874206543, + "logits/oppo_real": -2.742513656616211, + "logits/real": -1.9242509603500366, + "logps/generated": -637.9912719726562, + "logps/oppo_gen": -65.0790786743164, + "logps/oppo_real": -264.8369445800781, + "logps/real": -255.57125854492188, + "loss": 0.3486, + "loss/gen": 0.12847480177879333, + "loss/real": 0.24175365269184113, + "rewards/accuracies": 1.0, + "rewards/generated": -572.9121704101562, + "rewards/margins": 582.1778564453125, + "rewards/real": 9.265676498413086, + "step": 243 + }, + { + "epoch": 0.51, + "grad_norm": 36.0462298788176, + "learning_rate": 2.8459229358538404e-07, + "logits/generated": -0.3217124342918396, + "logits/oppo_generated": -2.7591960430145264, + "logits/oppo_real": -2.847045660018921, + "logits/real": -2.0825626850128174, + "logps/generated": -639.609375, + "logps/oppo_gen": -79.41316986083984, + "logps/oppo_real": -322.44171142578125, + "logps/real": -304.0206298828125, + "loss": 0.2817, + "loss/gen": 0.08697028458118439, + "loss/real": 0.18020153045654297, + "rewards/accuracies": 1.0, + "rewards/generated": -560.1961669921875, + "rewards/margins": 578.6173706054688, + "rewards/real": 18.421127319335938, + "step": 244 + }, + { + "epoch": 0.51, + "grad_norm": 23.098431475342515, + "learning_rate": 2.827824487755007e-07, + "logits/generated": 0.6684847474098206, + "logits/oppo_generated": -3.039127826690674, + "logits/oppo_real": -2.9222187995910645, + "logits/real": -2.522883892059326, + "logps/generated": -825.6904296875, + "logps/oppo_gen": -92.08659362792969, + "logps/oppo_real": -440.71002197265625, + "logps/real": -405.4791259765625, + "loss": 0.252, + "loss/gen": 0.0202939473092556, + "loss/real": 0.14877164363861084, + "rewards/accuracies": 1.0, + "rewards/generated": -733.6038208007812, + "rewards/margins": 768.834716796875, + "rewards/real": 35.23091506958008, + "step": 245 + }, + { + "epoch": 0.51, + "grad_norm": 32.593987819722024, + "learning_rate": 2.8097085410968694e-07, + "logits/generated": 0.48746663331985474, + "logits/oppo_generated": -2.485867977142334, + "logits/oppo_real": -2.4769599437713623, + "logits/real": -1.8404998779296875, + "logps/generated": -625.856201171875, + "logps/oppo_gen": -90.03643798828125, + "logps/oppo_real": -255.79519653320312, + "logps/real": -247.8499298095703, + "loss": 0.4168, + "loss/gen": 0.13854430615901947, + "loss/real": 0.21315725147724152, + "rewards/accuracies": 1.0, + "rewards/generated": -535.81982421875, + "rewards/margins": 543.7650756835938, + "rewards/real": 7.945267677307129, + "step": 246 + }, + { + "epoch": 0.52, + "grad_norm": 36.158667270102754, + "learning_rate": 2.7915760628693253e-07, + "logits/generated": 1.4546080827713013, + "logits/oppo_generated": -2.7927658557891846, + "logits/oppo_real": -2.680619478225708, + "logits/real": -2.005889415740967, + "logps/generated": -741.1942138671875, + "logps/oppo_gen": -81.70547485351562, + "logps/oppo_real": -251.9884033203125, + "logps/real": -246.9957733154297, + "loss": 0.2965, + "loss/gen": 0.0464542955160141, + "loss/real": 0.23268568515777588, + "rewards/accuracies": 1.0, + "rewards/generated": -659.48876953125, + "rewards/margins": 664.4813842773438, + "rewards/real": 4.99260139465332, + "step": 247 + }, + { + "epoch": 0.52, + "grad_norm": 46.582294450252235, + "learning_rate": 2.7734280209446865e-07, + "logits/generated": 2.0774784088134766, + "logits/oppo_generated": -2.685457229614258, + "logits/oppo_real": -2.7742578983306885, + "logits/real": -1.0988413095474243, + "logps/generated": -716.355224609375, + "logps/oppo_gen": -73.68305206298828, + "logps/oppo_real": -247.6187744140625, + "logps/real": -261.7083435058594, + "loss": 0.3984, + "loss/gen": 0.022194834426045418, + "loss/real": 0.318002313375473, + "rewards/accuracies": 1.0, + "rewards/generated": -642.6722412109375, + "rewards/margins": 628.5826416015625, + "rewards/real": -14.089559555053711, + "step": 248 + }, + { + "epoch": 0.52, + "grad_norm": 39.37642916553157, + "learning_rate": 2.755265384026023e-07, + "logits/generated": 2.5619468688964844, + "logits/oppo_generated": -2.7931642532348633, + "logits/oppo_real": -2.6713364124298096, + "logits/real": -1.2989791631698608, + "logps/generated": -589.9859619140625, + "logps/oppo_gen": -74.50320434570312, + "logps/oppo_real": -232.06639099121094, + "logps/real": -266.68707275390625, + "loss": 0.343, + "loss/gen": 0.11266843974590302, + "loss/real": 0.4674380123615265, + "rewards/accuracies": 0.9375, + "rewards/generated": -515.4827880859375, + "rewards/margins": 480.86212158203125, + "rewards/real": -34.620670318603516, + "step": 249 + }, + { + "epoch": 0.52, + "grad_norm": 30.31126366679761, + "learning_rate": 2.7370891215954565e-07, + "logits/generated": -0.13464397192001343, + "logits/oppo_generated": -2.922173500061035, + "logits/oppo_real": -2.915562391281128, + "logits/real": -1.2431890964508057, + "logps/generated": -568.3289184570312, + "logps/oppo_gen": -93.9364013671875, + "logps/oppo_real": -277.7060241699219, + "logps/real": -319.4127197265625, + "loss": 0.3484, + "loss/gen": 0.1920367181301117, + "loss/real": 0.4756568372249603, + "rewards/accuracies": 0.8125, + "rewards/generated": -474.39251708984375, + "rewards/margins": 432.685791015625, + "rewards/real": -41.70672607421875, + "step": 250 + }, + { + "epoch": 0.53, + "grad_norm": 32.90219882469863, + "learning_rate": 2.7189002038624057e-07, + "logits/generated": 0.6835288405418396, + "logits/oppo_generated": -2.8348021507263184, + "logits/oppo_real": -2.6828556060791016, + "logits/real": -1.7274892330169678, + "logps/generated": -670.5845336914062, + "logps/oppo_gen": -77.95462036132812, + "logps/oppo_real": -321.50152587890625, + "logps/real": -326.0620422363281, + "loss": 0.2903, + "loss/gen": 0.0680694729089737, + "loss/real": 0.2676253318786621, + "rewards/accuracies": 1.0, + "rewards/generated": -592.6298828125, + "rewards/margins": 588.0693969726562, + "rewards/real": -4.560503959655762, + "step": 251 + }, + { + "epoch": 0.53, + "grad_norm": 30.531534871048184, + "learning_rate": 2.7006996017118027e-07, + "logits/generated": 1.0771749019622803, + "logits/oppo_generated": -2.705852508544922, + "logits/oppo_real": -2.7847092151641846, + "logits/real": -1.856350064277649, + "logps/generated": -580.4610595703125, + "logps/oppo_gen": -66.31861877441406, + "logps/oppo_real": -250.60986328125, + "logps/real": -234.23973083496094, + "loss": 0.2796, + "loss/gen": 0.09471721947193146, + "loss/real": 0.18563194572925568, + "rewards/accuracies": 1.0, + "rewards/generated": -514.1424560546875, + "rewards/margins": 530.5125732421875, + "rewards/real": 16.370126724243164, + "step": 252 + }, + { + "epoch": 0.53, + "grad_norm": 25.42876102382952, + "learning_rate": 2.682488286652269e-07, + "logits/generated": 1.3248242139816284, + "logits/oppo_generated": -2.7483713626861572, + "logits/oppo_real": -2.829575538635254, + "logits/real": -1.816493034362793, + "logps/generated": -644.23974609375, + "logps/oppo_gen": -80.60096740722656, + "logps/oppo_real": -285.4813232421875, + "logps/real": -277.68682861328125, + "loss": 0.3086, + "loss/gen": 0.06294162571430206, + "loss/real": 0.2093324214220047, + "rewards/accuracies": 1.0, + "rewards/generated": -563.6387939453125, + "rewards/margins": 571.4332275390625, + "rewards/real": 7.794487953186035, + "step": 253 + }, + { + "epoch": 0.53, + "grad_norm": 21.81948661948513, + "learning_rate": 2.6642672307642573e-07, + "logits/generated": 0.9607592821121216, + "logits/oppo_generated": -2.632258653640747, + "logits/oppo_real": -2.55385160446167, + "logits/real": -2.0827624797821045, + "logps/generated": -574.7434692382812, + "logps/oppo_gen": -69.48466491699219, + "logps/oppo_real": -261.5279235839844, + "logps/real": -236.08151245117188, + "loss": 0.3025, + "loss/gen": 0.11658591777086258, + "loss/real": 0.16278226673603058, + "rewards/accuracies": 1.0, + "rewards/generated": -505.25885009765625, + "rewards/margins": 530.7052612304688, + "rewards/real": 25.446413040161133, + "step": 254 + }, + { + "epoch": 0.53, + "grad_norm": 34.6780286900528, + "learning_rate": 2.646037406648165e-07, + "logits/generated": 0.40537479519844055, + "logits/oppo_generated": -2.7576169967651367, + "logits/oppo_real": -2.7069432735443115, + "logits/real": -2.0863590240478516, + "logps/generated": -679.2177734375, + "logps/oppo_gen": -119.10911560058594, + "logps/oppo_real": -359.0958557128906, + "logps/real": -355.48931884765625, + "loss": 0.4468, + "loss/gen": 0.06933214515447617, + "loss/real": 0.28164565563201904, + "rewards/accuracies": 1.0, + "rewards/generated": -560.108642578125, + "rewards/margins": 563.7152099609375, + "rewards/real": 3.6065492630004883, + "step": 255 + }, + { + "epoch": 0.54, + "grad_norm": 23.262374374157027, + "learning_rate": 2.6277997873724176e-07, + "logits/generated": 2.0152463912963867, + "logits/oppo_generated": -2.6790993213653564, + "logits/oppo_real": -2.67695951461792, + "logits/real": -1.8701472282409668, + "logps/generated": -630.0718383789062, + "logps/oppo_gen": -81.55001831054688, + "logps/oppo_real": -244.86224365234375, + "logps/real": -256.7394104003906, + "loss": 0.3128, + "loss/gen": 0.04411861300468445, + "loss/real": 0.38423439860343933, + "rewards/accuracies": 1.0, + "rewards/generated": -548.5217895507812, + "rewards/margins": 536.6446533203125, + "rewards/real": -11.87718391418457, + "step": 256 + }, + { + "epoch": 0.54, + "grad_norm": 26.47014389961653, + "learning_rate": 2.609555346421532e-07, + "logits/generated": 0.46947139501571655, + "logits/oppo_generated": -2.7998907566070557, + "logits/oppo_real": -2.928711414337158, + "logits/real": -1.9949252605438232, + "logps/generated": -624.2977294921875, + "logps/oppo_gen": -78.38945007324219, + "logps/oppo_real": -242.44906616210938, + "logps/real": -243.96652221679688, + "loss": 0.3466, + "loss/gen": 0.06059020385146141, + "loss/real": 0.25872138142585754, + "rewards/accuracies": 0.9375, + "rewards/generated": -545.9083251953125, + "rewards/margins": 544.3908081054688, + "rewards/real": -1.5174579620361328, + "step": 257 + }, + { + "epoch": 0.54, + "grad_norm": 44.01876569598968, + "learning_rate": 2.5913050576441473e-07, + "logits/generated": 0.2901713252067566, + "logits/oppo_generated": -2.3973050117492676, + "logits/oppo_real": -2.54941463470459, + "logits/real": -1.5007007122039795, + "logps/generated": -672.2840576171875, + "logps/oppo_gen": -78.8645248413086, + "logps/oppo_real": -219.2704620361328, + "logps/real": -222.47265625, + "loss": 0.3215, + "loss/gen": 0.05777715891599655, + "loss/real": 0.27797359228134155, + "rewards/accuracies": 1.0, + "rewards/generated": -593.4195556640625, + "rewards/margins": 590.2173461914062, + "rewards/real": -3.202197551727295, + "step": 258 + }, + { + "epoch": 0.54, + "grad_norm": 27.867143695648217, + "learning_rate": 2.5730498952010496e-07, + "logits/generated": 1.1024194955825806, + "logits/oppo_generated": -2.8151259422302246, + "logits/oppo_real": -2.8964319229125977, + "logits/real": -2.0106348991394043, + "logps/generated": -623.4109497070312, + "logps/oppo_gen": -62.03562927246094, + "logps/oppo_real": -338.70721435546875, + "logps/real": -356.89959716796875, + "loss": 0.3348, + "loss/gen": 0.13135670125484467, + "loss/real": 0.4308997392654419, + "rewards/accuracies": 1.0, + "rewards/generated": -561.3753051757812, + "rewards/margins": 543.1829223632812, + "rewards/real": -18.192373275756836, + "step": 259 + }, + { + "epoch": 0.54, + "grad_norm": 43.278825800792156, + "learning_rate": 2.55479083351317e-07, + "logits/generated": 0.6705357432365417, + "logits/oppo_generated": -2.8476147651672363, + "logits/oppo_real": -3.038517713546753, + "logits/real": -2.0954232215881348, + "logps/generated": -606.3067626953125, + "logps/oppo_gen": -65.98280334472656, + "logps/oppo_real": -297.4833984375, + "logps/real": -295.8573913574219, + "loss": 0.347, + "loss/gen": 0.13984668254852295, + "loss/real": 0.2379724085330963, + "rewards/accuracies": 0.9375, + "rewards/generated": -540.323974609375, + "rewards/margins": 541.9500122070312, + "rewards/real": 1.6260404586791992, + "step": 260 + }, + { + "epoch": 0.55, + "grad_norm": 33.369751020993924, + "learning_rate": 2.536528847209573e-07, + "logits/generated": 0.027688533067703247, + "logits/oppo_generated": -2.5906708240509033, + "logits/oppo_real": -2.67025089263916, + "logits/real": -1.7617822885513306, + "logps/generated": -647.2080078125, + "logps/oppo_gen": -73.23574829101562, + "logps/oppo_real": -239.16107177734375, + "logps/real": -228.57687377929688, + "loss": 0.3379, + "loss/gen": 0.24388740956783295, + "loss/real": 0.1990511417388916, + "rewards/accuracies": 0.9375, + "rewards/generated": -573.9722290039062, + "rewards/margins": 584.556396484375, + "rewards/real": 10.584165573120117, + "step": 261 + }, + { + "epoch": 0.55, + "grad_norm": 47.93150019493965, + "learning_rate": 2.5182649110754325e-07, + "logits/generated": 0.09529060125350952, + "logits/oppo_generated": -2.763000011444092, + "logits/oppo_real": -2.8288025856018066, + "logits/real": -2.132340431213379, + "logps/generated": -620.471435546875, + "logps/oppo_gen": -84.12704467773438, + "logps/oppo_real": -251.29183959960938, + "logps/real": -241.84896850585938, + "loss": 0.3577, + "loss/gen": 0.2290661633014679, + "loss/real": 0.21368204057216644, + "rewards/accuracies": 1.0, + "rewards/generated": -536.3443603515625, + "rewards/margins": 545.7872314453125, + "rewards/real": 9.442842483520508, + "step": 262 + }, + { + "epoch": 0.55, + "grad_norm": 75.39146534505745, + "learning_rate": 2.5e-07, + "logits/generated": -0.36547717452049255, + "logits/oppo_generated": -2.7416415214538574, + "logits/oppo_real": -2.677898645401001, + "logits/real": -2.1685986518859863, + "logps/generated": -853.912841796875, + "logps/oppo_gen": -94.32908630371094, + "logps/oppo_real": -302.0242614746094, + "logps/real": -274.1735534667969, + "loss": 0.3866, + "loss/gen": 0.07128161191940308, + "loss/real": 0.1598164141178131, + "rewards/accuracies": 1.0, + "rewards/generated": -759.583740234375, + "rewards/margins": 787.4345092773438, + "rewards/real": 27.850727081298828, + "step": 263 + }, + { + "epoch": 0.55, + "grad_norm": 44.66155824818755, + "learning_rate": 2.4817350889245673e-07, + "logits/generated": -0.10230934619903564, + "logits/oppo_generated": -2.799670696258545, + "logits/oppo_real": -2.7438480854034424, + "logits/real": -2.254028081893921, + "logps/generated": -686.68310546875, + "logps/oppo_gen": -80.21273803710938, + "logps/oppo_real": -327.29644775390625, + "logps/real": -294.9067077636719, + "loss": 0.391, + "loss/gen": 0.07902692258358002, + "loss/real": 0.15174683928489685, + "rewards/accuracies": 1.0, + "rewards/generated": -606.4703369140625, + "rewards/margins": 638.8599853515625, + "rewards/real": 32.38971710205078, + "step": 264 + }, + { + "epoch": 0.55, + "grad_norm": 18.82427994970857, + "learning_rate": 2.463471152790427e-07, + "logits/generated": 0.20839297771453857, + "logits/oppo_generated": -2.7984328269958496, + "logits/oppo_real": -2.7523856163024902, + "logits/real": -2.133162498474121, + "logps/generated": -691.3712768554688, + "logps/oppo_gen": -82.20599365234375, + "logps/oppo_real": -347.56866455078125, + "logps/real": -320.5689697265625, + "loss": 0.2817, + "loss/gen": 0.15159496665000916, + "loss/real": 0.16067659854888916, + "rewards/accuracies": 0.9375, + "rewards/generated": -609.165283203125, + "rewards/margins": 636.1649169921875, + "rewards/real": 26.999671936035156, + "step": 265 + }, + { + "epoch": 0.56, + "grad_norm": 75.24201512349167, + "learning_rate": 2.44520916648683e-07, + "logits/generated": -0.19325079023838043, + "logits/oppo_generated": -2.7985243797302246, + "logits/oppo_real": -2.8215994834899902, + "logits/real": -2.207157611846924, + "logps/generated": -727.372802734375, + "logps/oppo_gen": -86.37100219726562, + "logps/oppo_real": -314.001708984375, + "logps/real": -294.5711975097656, + "loss": 0.4154, + "loss/gen": 0.032481130212545395, + "loss/real": 0.22301779687404633, + "rewards/accuracies": 1.0, + "rewards/generated": -641.0017700195312, + "rewards/margins": 660.4323120117188, + "rewards/real": 19.43051528930664, + "step": 266 + }, + { + "epoch": 0.56, + "grad_norm": 27.517901887978418, + "learning_rate": 2.426950104798951e-07, + "logits/generated": 0.02050509676337242, + "logits/oppo_generated": -2.809068202972412, + "logits/oppo_real": -2.782775402069092, + "logits/real": -2.2482824325561523, + "logps/generated": -662.8475341796875, + "logps/oppo_gen": -91.16156005859375, + "logps/oppo_real": -328.0069580078125, + "logps/real": -295.89288330078125, + "loss": 0.276, + "loss/gen": 0.0662633553147316, + "loss/real": 0.15361186861991882, + "rewards/accuracies": 1.0, + "rewards/generated": -571.6859741210938, + "rewards/margins": 603.800048828125, + "rewards/real": 32.11410140991211, + "step": 267 + }, + { + "epoch": 0.56, + "grad_norm": 58.34852643477276, + "learning_rate": 2.4086949423558525e-07, + "logits/generated": 0.4598902761936188, + "logits/oppo_generated": -2.640536308288574, + "logits/oppo_real": -2.6269845962524414, + "logits/real": -1.9713550806045532, + "logps/generated": -609.9164428710938, + "logps/oppo_gen": -77.98777770996094, + "logps/oppo_real": -400.0408935546875, + "logps/real": -374.6983642578125, + "loss": 0.3319, + "loss/gen": 0.17593906819820404, + "loss/real": 0.16186195611953735, + "rewards/accuracies": 1.0, + "rewards/generated": -531.9286499023438, + "rewards/margins": 557.2711791992188, + "rewards/real": 25.342538833618164, + "step": 268 + }, + { + "epoch": 0.56, + "grad_norm": 43.833663030237815, + "learning_rate": 2.3904446535784686e-07, + "logits/generated": -0.2348947525024414, + "logits/oppo_generated": -2.701582431793213, + "logits/oppo_real": -2.8296256065368652, + "logits/real": -2.0177409648895264, + "logps/generated": -653.5576171875, + "logps/oppo_gen": -91.32444763183594, + "logps/oppo_real": -384.9336853027344, + "logps/real": -351.2134094238281, + "loss": 0.3148, + "loss/gen": 0.1337149441242218, + "loss/real": 0.15142256021499634, + "rewards/accuracies": 1.0, + "rewards/generated": -562.233154296875, + "rewards/margins": 595.9534912109375, + "rewards/real": 33.720272064208984, + "step": 269 + }, + { + "epoch": 0.56, + "grad_norm": 29.247056601036263, + "learning_rate": 2.3722002126275822e-07, + "logits/generated": -0.4930816888809204, + "logits/oppo_generated": -2.8453965187072754, + "logits/oppo_real": -2.7912707328796387, + "logits/real": -2.370246410369873, + "logps/generated": -680.9036865234375, + "logps/oppo_gen": -103.50727844238281, + "logps/oppo_real": -434.1189270019531, + "logps/real": -401.18719482421875, + "loss": 0.3351, + "loss/gen": 0.15950866043567657, + "loss/real": 0.15047958493232727, + "rewards/accuracies": 1.0, + "rewards/generated": -577.3963623046875, + "rewards/margins": 610.328125, + "rewards/real": 32.93174362182617, + "step": 270 + }, + { + "epoch": 0.57, + "grad_norm": 29.402288480169258, + "learning_rate": 2.353962593351835e-07, + "logits/generated": 1.3464157581329346, + "logits/oppo_generated": -2.815722942352295, + "logits/oppo_real": -2.8115828037261963, + "logits/real": -1.9343657493591309, + "logps/generated": -742.6785888671875, + "logps/oppo_gen": -90.67955780029297, + "logps/oppo_real": -319.91876220703125, + "logps/real": -318.8854064941406, + "loss": 0.31, + "loss/gen": 0.030156534165143967, + "loss/real": 0.26410186290740967, + "rewards/accuracies": 1.0, + "rewards/generated": -651.9990234375, + "rewards/margins": 653.032470703125, + "rewards/real": 1.033339500427246, + "step": 271 + }, + { + "epoch": 0.57, + "grad_norm": 32.55452345021061, + "learning_rate": 2.3357327692357428e-07, + "logits/generated": 1.7179875373840332, + "logits/oppo_generated": -2.6796679496765137, + "logits/oppo_real": -2.7324979305267334, + "logits/real": -1.4703872203826904, + "logps/generated": -692.619384765625, + "logps/oppo_gen": -64.20418548583984, + "logps/oppo_real": -228.74159240722656, + "logps/real": -244.6279296875, + "loss": 0.3772, + "loss/gen": 0.03423958644270897, + "loss/real": 0.4124412536621094, + "rewards/accuracies": 1.0, + "rewards/generated": -628.4152221679688, + "rewards/margins": 612.5289306640625, + "rewards/real": -15.886341094970703, + "step": 272 + }, + { + "epoch": 0.57, + "grad_norm": 27.68333221135452, + "learning_rate": 2.317511713347731e-07, + "logits/generated": -0.3925975561141968, + "logits/oppo_generated": -2.623629093170166, + "logits/oppo_real": -2.891550064086914, + "logits/real": -1.9939806461334229, + "logps/generated": -675.5734252929688, + "logps/oppo_gen": -77.46165466308594, + "logps/oppo_real": -376.2138366699219, + "logps/real": -352.5907897949219, + "loss": 0.3106, + "loss/gen": 0.11591622233390808, + "loss/real": 0.16552887856960297, + "rewards/accuracies": 1.0, + "rewards/generated": -598.11181640625, + "rewards/margins": 621.7348022460938, + "rewards/real": 23.623062133789062, + "step": 273 + }, + { + "epoch": 0.57, + "grad_norm": 64.98134122216354, + "learning_rate": 2.2993003982881973e-07, + "logits/generated": 0.04800446331501007, + "logits/oppo_generated": -2.7032546997070312, + "logits/oppo_real": -2.7032618522644043, + "logits/real": -1.8638030290603638, + "logps/generated": -693.61572265625, + "logps/oppo_gen": -91.1432876586914, + "logps/oppo_real": -297.7861633300781, + "logps/real": -308.7113037109375, + "loss": 0.3949, + "loss/gen": 0.04603281617164612, + "loss/real": 0.28950807452201843, + "rewards/accuracies": 1.0, + "rewards/generated": -602.472412109375, + "rewards/margins": 591.5473022460938, + "rewards/real": -10.925130844116211, + "step": 274 + }, + { + "epoch": 0.58, + "grad_norm": 39.85445015635801, + "learning_rate": 2.2810997961375938e-07, + "logits/generated": 2.8605520725250244, + "logits/oppo_generated": -2.9404988288879395, + "logits/oppo_real": -2.5591325759887695, + "logits/real": -1.763561725616455, + "logps/generated": -583.0770263671875, + "logps/oppo_gen": -56.471839904785156, + "logps/oppo_real": -241.88677978515625, + "logps/real": -262.81781005859375, + "loss": 0.3835, + "loss/gen": 0.06709041446447372, + "loss/real": 0.4117059111595154, + "rewards/accuracies": 1.0, + "rewards/generated": -526.605224609375, + "rewards/margins": 505.6741943359375, + "rewards/real": -20.931013107299805, + "step": 275 + }, + { + "epoch": 0.58, + "grad_norm": 26.09332545500441, + "learning_rate": 2.2629108784045436e-07, + "logits/generated": 0.5841866731643677, + "logits/oppo_generated": -2.6106820106506348, + "logits/oppo_real": -2.7232418060302734, + "logits/real": -1.913360834121704, + "logps/generated": -659.1812744140625, + "logps/oppo_gen": -76.83047485351562, + "logps/oppo_real": -281.0586242675781, + "logps/real": -261.4032287597656, + "loss": 0.2957, + "loss/gen": 0.025033961981534958, + "loss/real": 0.1760440468788147, + "rewards/accuracies": 1.0, + "rewards/generated": -582.3507690429688, + "rewards/margins": 602.0061645507812, + "rewards/real": 19.655406951904297, + "step": 276 + }, + { + "epoch": 0.58, + "grad_norm": 38.50617594539219, + "learning_rate": 2.2447346159739768e-07, + "logits/generated": -0.026605768129229546, + "logits/oppo_generated": -2.732983112335205, + "logits/oppo_real": -2.733703136444092, + "logits/real": -2.2098536491394043, + "logps/generated": -640.650634765625, + "logps/oppo_gen": -102.6632080078125, + "logps/oppo_real": -385.54461669921875, + "logps/real": -364.49481201171875, + "loss": 0.36, + "loss/gen": 0.17120496928691864, + "loss/real": 0.178038090467453, + "rewards/accuracies": 1.0, + "rewards/generated": -537.9873657226562, + "rewards/margins": 559.0371704101562, + "rewards/real": 21.049800872802734, + "step": 277 + }, + { + "epoch": 0.58, + "grad_norm": 30.374145998154212, + "learning_rate": 2.2265719790553146e-07, + "logits/generated": 0.42899081110954285, + "logits/oppo_generated": -2.741022825241089, + "logits/oppo_real": -2.7121999263763428, + "logits/real": -2.1624093055725098, + "logps/generated": -740.4405517578125, + "logps/oppo_gen": -90.87716674804688, + "logps/oppo_real": -406.1678161621094, + "logps/real": -392.99517822265625, + "loss": 0.3204, + "loss/gen": 0.020876560360193253, + "loss/real": 0.19476071000099182, + "rewards/accuracies": 1.0, + "rewards/generated": -649.5633544921875, + "rewards/margins": 662.7359619140625, + "rewards/real": 13.172629356384277, + "step": 278 + }, + { + "epoch": 0.58, + "grad_norm": 31.19044129103799, + "learning_rate": 2.2084239371306752e-07, + "logits/generated": 1.6656911373138428, + "logits/oppo_generated": -2.8956916332244873, + "logits/oppo_real": -2.856825351715088, + "logits/real": -2.215172052383423, + "logps/generated": -657.0090942382812, + "logps/oppo_gen": -64.58488464355469, + "logps/oppo_real": -211.9491424560547, + "logps/real": -219.18777465820312, + "loss": 0.3131, + "loss/gen": 0.03627927601337433, + "loss/real": 0.4412737786769867, + "rewards/accuracies": 0.9375, + "rewards/generated": -592.4242553710938, + "rewards/margins": 585.1856689453125, + "rewards/real": -7.238637924194336, + "step": 279 + }, + { + "epoch": 0.59, + "grad_norm": 31.520425622845142, + "learning_rate": 2.19029145890313e-07, + "logits/generated": 0.3855173587799072, + "logits/oppo_generated": -2.7485857009887695, + "logits/oppo_real": -2.783714771270752, + "logits/real": -2.1652963161468506, + "logps/generated": -678.6658935546875, + "logps/oppo_gen": -70.05374145507812, + "logps/oppo_real": -304.0491638183594, + "logps/real": -283.9021911621094, + "loss": 0.3017, + "loss/gen": 0.03154566138982773, + "loss/real": 0.17599177360534668, + "rewards/accuracies": 1.0, + "rewards/generated": -608.6121826171875, + "rewards/margins": 628.7591552734375, + "rewards/real": 20.14695930480957, + "step": 280 + }, + { + "epoch": 0.59, + "grad_norm": 35.63404319158419, + "learning_rate": 2.172175512244993e-07, + "logits/generated": 0.7677186727523804, + "logits/oppo_generated": -2.8187942504882812, + "logits/oppo_real": -2.884047508239746, + "logits/real": -2.267946243286133, + "logps/generated": -705.670654296875, + "logps/oppo_gen": -83.02871704101562, + "logps/oppo_real": -363.13525390625, + "logps/real": -324.23687744140625, + "loss": 0.277, + "loss/gen": 0.04462321102619171, + "loss/real": 0.14266663789749146, + "rewards/accuracies": 1.0, + "rewards/generated": -622.6419067382812, + "rewards/margins": 661.540283203125, + "rewards/real": 38.898380279541016, + "step": 281 + }, + { + "epoch": 0.59, + "grad_norm": 44.425770087321645, + "learning_rate": 2.154077064146159e-07, + "logits/generated": -0.42760705947875977, + "logits/oppo_generated": -2.8119869232177734, + "logits/oppo_real": -3.1372385025024414, + "logits/real": -2.188045024871826, + "logps/generated": -667.8062744140625, + "logps/oppo_gen": -74.51792907714844, + "logps/oppo_real": -328.4604187011719, + "logps/real": -306.0193786621094, + "loss": 0.3065, + "loss/gen": 0.048910096287727356, + "loss/real": 0.18809708952903748, + "rewards/accuracies": 1.0, + "rewards/generated": -593.2882690429688, + "rewards/margins": 615.7293701171875, + "rewards/real": 22.441036224365234, + "step": 282 + }, + { + "epoch": 0.59, + "grad_norm": 24.95744791726447, + "learning_rate": 2.1359970806624884e-07, + "logits/generated": 0.22146156430244446, + "logits/oppo_generated": -2.6513538360595703, + "logits/oppo_real": -2.6379599571228027, + "logits/real": -1.8909337520599365, + "logps/generated": -693.6986694335938, + "logps/oppo_gen": -84.63557434082031, + "logps/oppo_real": -237.87828063964844, + "logps/real": -236.34927368164062, + "loss": 0.3319, + "loss/gen": 0.09527064859867096, + "loss/real": 0.2507627606391907, + "rewards/accuracies": 1.0, + "rewards/generated": -609.0631103515625, + "rewards/margins": 610.5921020507812, + "rewards/real": 1.529006004333496, + "step": 283 + }, + { + "epoch": 0.59, + "grad_norm": 28.732460045285126, + "learning_rate": 2.1179365268642375e-07, + "logits/generated": 0.1767929643392563, + "logits/oppo_generated": -2.7945454120635986, + "logits/oppo_real": -2.902392864227295, + "logits/real": -2.1974804401397705, + "logps/generated": -644.111572265625, + "logps/oppo_gen": -74.51861572265625, + "logps/oppo_real": -385.30194091796875, + "logps/real": -357.007080078125, + "loss": 0.3031, + "loss/gen": 0.05500415712594986, + "loss/real": 0.16526341438293457, + "rewards/accuracies": 1.0, + "rewards/generated": -569.5928955078125, + "rewards/margins": 597.8878173828125, + "rewards/real": 28.294912338256836, + "step": 284 + }, + { + "epoch": 0.6, + "grad_norm": 23.633269623678157, + "learning_rate": 2.0998963667845536e-07, + "logits/generated": -0.31503739953041077, + "logits/oppo_generated": -2.682985782623291, + "logits/oppo_real": -2.8302135467529297, + "logits/real": -2.067976951599121, + "logps/generated": -667.347900390625, + "logps/oppo_gen": -108.3177261352539, + "logps/oppo_real": -433.48614501953125, + "logps/real": -399.8238525390625, + "loss": 0.3811, + "loss/gen": 0.1597834676504135, + "loss/real": 0.1646268665790558, + "rewards/accuracies": 1.0, + "rewards/generated": -559.0302124023438, + "rewards/margins": 592.6925048828125, + "rewards/real": 33.66226577758789, + "step": 285 + }, + { + "epoch": 0.6, + "grad_norm": 34.15444156923445, + "learning_rate": 2.0818775633680055e-07, + "logits/generated": -0.049648359417915344, + "logits/oppo_generated": -2.8504347801208496, + "logits/oppo_real": -2.82558536529541, + "logits/real": -2.302478790283203, + "logps/generated": -782.4987182617188, + "logps/oppo_gen": -89.17402648925781, + "logps/oppo_real": -381.289794921875, + "logps/real": -370.0972900390625, + "loss": 0.323, + "loss/gen": 0.004926434252411127, + "loss/real": 0.2192765325307846, + "rewards/accuracies": 1.0, + "rewards/generated": -693.32470703125, + "rewards/margins": 704.5172119140625, + "rewards/real": 11.192514419555664, + "step": 286 + }, + { + "epoch": 0.6, + "grad_norm": 34.53213198817063, + "learning_rate": 2.0638810784191946e-07, + "logits/generated": 0.74876868724823, + "logits/oppo_generated": -2.820481777191162, + "logits/oppo_real": -2.9427828788757324, + "logits/real": -2.221226215362549, + "logps/generated": -807.7759399414062, + "logps/oppo_gen": -93.68537902832031, + "logps/oppo_real": -462.7519836425781, + "logps/real": -431.6222839355469, + "loss": 0.3505, + "loss/gen": 0.00832824781537056, + "loss/real": 0.15834376215934753, + "rewards/accuracies": 1.0, + "rewards/generated": -714.090576171875, + "rewards/margins": 745.2202758789062, + "rewards/real": 31.129711151123047, + "step": 287 + }, + { + "epoch": 0.6, + "grad_norm": 28.628067052187287, + "learning_rate": 2.0459078725514089e-07, + "logits/generated": 1.6910818815231323, + "logits/oppo_generated": -2.6388208866119385, + "logits/oppo_real": -2.623079538345337, + "logits/real": -1.9768972396850586, + "logps/generated": -802.6104736328125, + "logps/oppo_gen": -63.859310150146484, + "logps/oppo_real": -286.3548889160156, + "logps/real": -259.7296142578125, + "loss": 0.4453, + "loss/gen": 0.025440678000450134, + "loss/real": 0.1741112470626831, + "rewards/accuracies": 1.0, + "rewards/generated": -738.7510986328125, + "rewards/margins": 765.3764038085938, + "rewards/real": 26.625244140625, + "step": 288 + }, + { + "epoch": 0.6, + "grad_norm": 58.26362627303912, + "learning_rate": 2.027958905135349e-07, + "logits/generated": 0.4634339213371277, + "logits/oppo_generated": -2.668745517730713, + "logits/oppo_real": -2.7086033821105957, + "logits/real": -2.0303690433502197, + "logps/generated": -743.83544921875, + "logps/oppo_gen": -85.07185363769531, + "logps/oppo_real": -261.210205078125, + "logps/real": -244.32540893554688, + "loss": 0.3133, + "loss/gen": 0.026636935770511627, + "loss/real": 0.1779983788728714, + "rewards/accuracies": 1.0, + "rewards/generated": -658.7635498046875, + "rewards/margins": 675.6484375, + "rewards/real": 16.884801864624023, + "step": 289 + }, + { + "epoch": 0.61, + "grad_norm": 29.702995544825736, + "learning_rate": 2.0100351342479216e-07, + "logits/generated": 0.3994565010070801, + "logits/oppo_generated": -2.7590723037719727, + "logits/oppo_real": -2.840005874633789, + "logits/real": -2.039400100708008, + "logps/generated": -675.1010131835938, + "logps/oppo_gen": -75.13490295410156, + "logps/oppo_real": -314.7492370605469, + "logps/real": -311.7596435546875, + "loss": 0.3358, + "loss/gen": 0.09637254476547241, + "loss/real": 0.2566570043563843, + "rewards/accuracies": 0.9375, + "rewards/generated": -599.9661865234375, + "rewards/margins": 602.9556884765625, + "rewards/real": 2.989558219909668, + "step": 290 + }, + { + "epoch": 0.61, + "grad_norm": 58.494534750719446, + "learning_rate": 1.9921375166210945e-07, + "logits/generated": 0.22771090269088745, + "logits/oppo_generated": -2.8160781860351562, + "logits/oppo_real": -2.926997184753418, + "logits/real": -1.7282150983810425, + "logps/generated": -617.6701049804688, + "logps/oppo_gen": -77.46833801269531, + "logps/oppo_real": -310.57672119140625, + "logps/real": -323.9449462890625, + "loss": 0.3855, + "loss/gen": 0.17424950003623962, + "loss/real": 0.40698057413101196, + "rewards/accuracies": 0.875, + "rewards/generated": -540.2017822265625, + "rewards/margins": 526.8335571289062, + "rewards/real": -13.368255615234375, + "step": 291 + }, + { + "epoch": 0.61, + "grad_norm": 34.476149123683136, + "learning_rate": 1.9742670075908349e-07, + "logits/generated": 0.3010374903678894, + "logits/oppo_generated": -2.6675429344177246, + "logits/oppo_real": -2.6383228302001953, + "logits/real": -1.7668535709381104, + "logps/generated": -607.7005615234375, + "logps/oppo_gen": -62.1541748046875, + "logps/oppo_real": -173.223388671875, + "logps/real": -186.780029296875, + "loss": 0.3663, + "loss/gen": 0.16208486258983612, + "loss/real": 0.2849530577659607, + "rewards/accuracies": 1.0, + "rewards/generated": -545.54638671875, + "rewards/margins": 531.98974609375, + "rewards/real": -13.556650161743164, + "step": 292 + }, + { + "epoch": 0.61, + "grad_norm": 36.1219215005324, + "learning_rate": 1.9564245610461078e-07, + "logits/generated": -0.7002210021018982, + "logits/oppo_generated": -2.663135528564453, + "logits/oppo_real": -2.7156057357788086, + "logits/real": -1.9735496044158936, + "logps/generated": -620.8001098632812, + "logps/oppo_gen": -81.85824584960938, + "logps/oppo_real": -176.90597534179688, + "logps/real": -168.76197814941406, + "loss": 0.3097, + "loss/gen": 0.1982150822877884, + "loss/real": 0.19656775891780853, + "rewards/accuracies": 1.0, + "rewards/generated": -538.94189453125, + "rewards/margins": 547.0858764648438, + "rewards/real": 8.144001007080078, + "step": 293 + }, + { + "epoch": 0.62, + "grad_norm": 38.90704849833205, + "learning_rate": 1.938611129377967e-07, + "logits/generated": -0.833325982093811, + "logits/oppo_generated": -2.8089256286621094, + "logits/oppo_real": -3.007702112197876, + "logits/real": -2.1018872261047363, + "logps/generated": -722.702392578125, + "logps/oppo_gen": -64.71053314208984, + "logps/oppo_real": -203.855224609375, + "logps/real": -210.36614990234375, + "loss": 0.3285, + "loss/gen": 0.09308800101280212, + "loss/real": 0.2549854516983032, + "rewards/accuracies": 1.0, + "rewards/generated": -657.9918823242188, + "rewards/margins": 651.48095703125, + "rewards/real": -6.510924339294434, + "step": 294 + }, + { + "epoch": 0.62, + "grad_norm": 67.77022011455327, + "learning_rate": 1.920827663428714e-07, + "logits/generated": -0.717969536781311, + "logits/oppo_generated": -2.7657203674316406, + "logits/oppo_real": -2.7387871742248535, + "logits/real": -2.205482006072998, + "logps/generated": -609.5360717773438, + "logps/oppo_gen": -70.42521667480469, + "logps/oppo_real": -208.49960327148438, + "logps/real": -188.78085327148438, + "loss": 0.2982, + "loss/gen": 0.0571298748254776, + "loss/real": 0.17368248105049133, + "rewards/accuracies": 1.0, + "rewards/generated": -539.1109008789062, + "rewards/margins": 558.82958984375, + "rewards/real": 19.718734741210938, + "step": 295 + }, + { + "epoch": 0.62, + "grad_norm": 51.58006802989649, + "learning_rate": 1.9030751124411448e-07, + "logits/generated": 0.027949482202529907, + "logits/oppo_generated": -2.8746414184570312, + "logits/oppo_real": -2.7726399898529053, + "logits/real": -2.319335460662842, + "logps/generated": -620.2716674804688, + "logps/oppo_gen": -74.33796691894531, + "logps/oppo_real": -340.18280029296875, + "logps/real": -341.213134765625, + "loss": 0.4032, + "loss/gen": 0.15857474505901337, + "loss/real": 0.33517903089523315, + "rewards/accuracies": 1.0, + "rewards/generated": -545.9337158203125, + "rewards/margins": 544.9034423828125, + "rewards/real": -1.030303955078125, + "step": 296 + }, + { + "epoch": 0.62, + "grad_norm": 27.70785924367286, + "learning_rate": 1.8853544240078836e-07, + "logits/generated": 0.41290417313575745, + "logits/oppo_generated": -2.5701441764831543, + "logits/oppo_real": -2.689274311065674, + "logits/real": -1.8879668712615967, + "logps/generated": -677.0875244140625, + "logps/oppo_gen": -80.02645874023438, + "logps/oppo_real": -254.08285522460938, + "logps/real": -222.24395751953125, + "loss": 0.3388, + "loss/gen": 0.09368044137954712, + "loss/real": 0.15713843703269958, + "rewards/accuracies": 1.0, + "rewards/generated": -597.06103515625, + "rewards/margins": 628.8999633789062, + "rewards/real": 31.83889389038086, + "step": 297 + }, + { + "epoch": 0.62, + "grad_norm": 23.666014899650275, + "learning_rate": 1.8676665440207977e-07, + "logits/generated": -0.5900826454162598, + "logits/oppo_generated": -2.7572181224823, + "logits/oppo_real": -2.813715696334839, + "logits/real": -2.052342414855957, + "logps/generated": -579.000732421875, + "logps/oppo_gen": -73.36943817138672, + "logps/oppo_real": -282.2958679199219, + "logps/real": -263.3672180175781, + "loss": 0.2496, + "loss/gen": 0.19655288755893707, + "loss/real": 0.1768965721130371, + "rewards/accuracies": 0.9375, + "rewards/generated": -505.6312255859375, + "rewards/margins": 524.5599365234375, + "rewards/real": 18.92863655090332, + "step": 298 + }, + { + "epoch": 0.63, + "grad_norm": 30.369390275780255, + "learning_rate": 1.850012416620515e-07, + "logits/generated": 0.3133728504180908, + "logits/oppo_generated": -2.6170716285705566, + "logits/oppo_real": -2.797962188720703, + "logits/real": -1.5772523880004883, + "logps/generated": -602.629638671875, + "logps/oppo_gen": -57.24889373779297, + "logps/oppo_real": -146.9036865234375, + "logps/real": -174.32469177246094, + "loss": 0.2935, + "loss/gen": 0.060614556074142456, + "loss/real": 0.3480251729488373, + "rewards/accuracies": 1.0, + "rewards/generated": -545.3807373046875, + "rewards/margins": 517.9597778320312, + "rewards/real": -27.420988082885742, + "step": 299 + }, + { + "epoch": 0.63, + "grad_norm": 42.82485707844292, + "learning_rate": 1.8323929841460178e-07, + "logits/generated": 1.209000825881958, + "logits/oppo_generated": -2.8536508083343506, + "logits/oppo_real": -2.8496203422546387, + "logits/real": -2.18961238861084, + "logps/generated": -718.6183471679688, + "logps/oppo_gen": -65.0604476928711, + "logps/oppo_real": -244.0458526611328, + "logps/real": -218.15106201171875, + "loss": 0.3998, + "loss/gen": 0.022837601602077484, + "loss/real": 0.16522105038166046, + "rewards/accuracies": 1.0, + "rewards/generated": -653.557861328125, + "rewards/margins": 679.45263671875, + "rewards/real": 25.894800186157227, + "step": 300 + }, + { + "epoch": 0.63, + "grad_norm": 24.93317607211261, + "learning_rate": 1.8148091870843552e-07, + "logits/generated": 0.9332234263420105, + "logits/oppo_generated": -2.770528793334961, + "logits/oppo_real": -2.8234052658081055, + "logits/real": -2.085968494415283, + "logps/generated": -644.9743041992188, + "logps/oppo_gen": -78.03842163085938, + "logps/oppo_real": -221.58538818359375, + "logps/real": -202.37620544433594, + "loss": 0.2559, + "loss/gen": 0.16635200381278992, + "loss/real": 0.1705245077610016, + "rewards/accuracies": 1.0, + "rewards/generated": -566.9358520507812, + "rewards/margins": 586.14501953125, + "rewards/real": 19.20915412902832, + "step": 301 + }, + { + "epoch": 0.63, + "grad_norm": 26.63787215432487, + "learning_rate": 1.7972619640204294e-07, + "logits/generated": 1.1756725311279297, + "logits/oppo_generated": -2.713470935821533, + "logits/oppo_real": -2.8412275314331055, + "logits/real": -2.0337021350860596, + "logps/generated": -819.2066650390625, + "logps/oppo_gen": -77.62155151367188, + "logps/oppo_real": -245.73736572265625, + "logps/real": -243.28323364257812, + "loss": 0.2739, + "loss/gen": 0.032348792999982834, + "loss/real": 0.2555205523967743, + "rewards/accuracies": 1.0, + "rewards/generated": -741.5850830078125, + "rewards/margins": 744.0391845703125, + "rewards/real": 2.454113006591797, + "step": 302 + }, + { + "epoch": 0.63, + "grad_norm": 41.888609903510016, + "learning_rate": 1.779752251586906e-07, + "logits/generated": 0.9290653467178345, + "logits/oppo_generated": -2.5343804359436035, + "logits/oppo_real": -2.5752387046813965, + "logits/real": -0.9488723278045654, + "logps/generated": -612.5504150390625, + "logps/oppo_gen": -86.0159912109375, + "logps/oppo_real": -196.22686767578125, + "logps/real": -231.4734344482422, + "loss": 0.3814, + "loss/gen": 0.2599642276763916, + "loss/real": 0.3707355856895447, + "rewards/accuracies": 0.875, + "rewards/generated": -526.534423828125, + "rewards/margins": 491.287841796875, + "rewards/real": -35.24655532836914, + "step": 303 + }, + { + "epoch": 0.64, + "grad_norm": 44.957284956980274, + "learning_rate": 1.7622809844142137e-07, + "logits/generated": 0.5650679469108582, + "logits/oppo_generated": -2.718064308166504, + "logits/oppo_real": -2.7589216232299805, + "logits/real": -1.9737653732299805, + "logps/generated": -726.685791015625, + "logps/oppo_gen": -87.50894165039062, + "logps/oppo_real": -237.13243103027344, + "logps/real": -236.24334716796875, + "loss": 0.3701, + "loss/gen": 0.10715784132480621, + "loss/real": 0.2832576632499695, + "rewards/accuracies": 1.0, + "rewards/generated": -639.1767578125, + "rewards/margins": 640.0657958984375, + "rewards/real": 0.8890562057495117, + "step": 304 + }, + { + "epoch": 0.64, + "grad_norm": 31.1571266410647, + "learning_rate": 1.7448490950806548e-07, + "logits/generated": 0.5373824834823608, + "logits/oppo_generated": -2.698071002960205, + "logits/oppo_real": -2.8672518730163574, + "logits/real": -1.855992317199707, + "logps/generated": -874.5352783203125, + "logps/oppo_gen": -63.30276107788086, + "logps/oppo_real": -260.84515380859375, + "logps/real": -251.75595092773438, + "loss": 0.2728, + "loss/gen": 0.1308051496744156, + "loss/real": 0.2010972499847412, + "rewards/accuracies": 1.0, + "rewards/generated": -811.2324829101562, + "rewards/margins": 820.3217163085938, + "rewards/real": 9.089208602905273, + "step": 305 + }, + { + "epoch": 0.64, + "grad_norm": 29.924091179351734, + "learning_rate": 1.7274575140626315e-07, + "logits/generated": 2.4331858158111572, + "logits/oppo_generated": -2.5791516304016113, + "logits/oppo_real": -2.4137372970581055, + "logits/real": -1.069727897644043, + "logps/generated": -593.718994140625, + "logps/oppo_gen": -62.98029708862305, + "logps/oppo_real": -136.33953857421875, + "logps/real": -150.31948852539062, + "loss": 0.3726, + "loss/gen": 0.06495876610279083, + "loss/real": 0.28633958101272583, + "rewards/accuracies": 0.9375, + "rewards/generated": -530.7386474609375, + "rewards/margins": 516.7587280273438, + "rewards/real": -13.979955673217773, + "step": 306 + }, + { + "epoch": 0.64, + "grad_norm": 55.12626885231872, + "learning_rate": 1.7101071696849718e-07, + "logits/generated": 0.914715588092804, + "logits/oppo_generated": -2.8061037063598633, + "logits/oppo_real": -2.7929673194885254, + "logits/real": -1.7518253326416016, + "logps/generated": -656.9697875976562, + "logps/oppo_gen": -62.40575408935547, + "logps/oppo_real": -266.6053161621094, + "logps/real": -290.57000732421875, + "loss": 0.4364, + "loss/gen": 0.0519477054476738, + "loss/real": 0.4437403678894043, + "rewards/accuracies": 1.0, + "rewards/generated": -594.5640869140625, + "rewards/margins": 570.5994262695312, + "rewards/real": -23.96465301513672, + "step": 307 + }, + { + "epoch": 0.64, + "grad_norm": 37.763811628992116, + "learning_rate": 1.692798988071385e-07, + "logits/generated": 1.7852463722229004, + "logits/oppo_generated": -2.7456932067871094, + "logits/oppo_real": -2.709989309310913, + "logits/real": -1.7187665700912476, + "logps/generated": -818.27587890625, + "logps/oppo_gen": -61.88515853881836, + "logps/oppo_real": -247.7135467529297, + "logps/real": -256.1959228515625, + "loss": 0.3584, + "loss/gen": 0.023114312440156937, + "loss/real": 0.313579261302948, + "rewards/accuracies": 0.9375, + "rewards/generated": -756.3906860351562, + "rewards/margins": 747.9083251953125, + "rewards/real": -8.482393264770508, + "step": 308 + }, + { + "epoch": 0.65, + "grad_norm": 38.874349999590024, + "learning_rate": 1.6755338930950192e-07, + "logits/generated": -0.14025013148784637, + "logits/oppo_generated": -2.773202419281006, + "logits/oppo_real": -2.869076728820801, + "logits/real": -2.143603563308716, + "logps/generated": -687.7984008789062, + "logps/oppo_gen": -77.8241195678711, + "logps/oppo_real": -341.8100280761719, + "logps/real": -303.91571044921875, + "loss": 0.2404, + "loss/gen": 0.0828854888677597, + "loss/real": 0.1460573673248291, + "rewards/accuracies": 1.0, + "rewards/generated": -609.9743041992188, + "rewards/margins": 647.86865234375, + "rewards/real": 37.89434814453125, + "step": 309 + }, + { + "epoch": 0.65, + "grad_norm": 24.508315563054946, + "learning_rate": 1.6583128063291573e-07, + "logits/generated": 0.9043882489204407, + "logits/oppo_generated": -2.831587791442871, + "logits/oppo_real": -2.906121253967285, + "logits/real": -2.295949697494507, + "logps/generated": -736.5703125, + "logps/oppo_gen": -70.72431945800781, + "logps/oppo_real": -306.6643981933594, + "logps/real": -273.4691162109375, + "loss": 0.2844, + "loss/gen": 0.03372867777943611, + "loss/real": 0.14928202331066132, + "rewards/accuracies": 1.0, + "rewards/generated": -665.8460693359375, + "rewards/margins": 699.041259765625, + "rewards/real": 33.19529724121094, + "step": 310 + }, + { + "epoch": 0.65, + "grad_norm": 25.9435328074503, + "learning_rate": 1.6411366469980134e-07, + "logits/generated": -0.18510423600673676, + "logits/oppo_generated": -2.8098537921905518, + "logits/oppo_real": -2.916290521621704, + "logits/real": -2.3596861362457275, + "logps/generated": -769.0659790039062, + "logps/oppo_gen": -89.95899963378906, + "logps/oppo_real": -356.05450439453125, + "logps/real": -323.37353515625, + "loss": 0.3019, + "loss/gen": 0.023290330544114113, + "loss/real": 0.15135399997234344, + "rewards/accuracies": 1.0, + "rewards/generated": -679.10693359375, + "rewards/margins": 711.7879638671875, + "rewards/real": 32.680999755859375, + "step": 311 + }, + { + "epoch": 0.65, + "grad_norm": 38.68965658114824, + "learning_rate": 1.6240063319276764e-07, + "logits/generated": 0.11848394572734833, + "logits/oppo_generated": -2.7743453979492188, + "logits/oppo_real": -2.6186952590942383, + "logits/real": -1.9814376831054688, + "logps/generated": -694.65185546875, + "logps/oppo_gen": -78.62828063964844, + "logps/oppo_real": -246.07437133789062, + "logps/real": -253.11190795898438, + "loss": 0.3493, + "loss/gen": 0.07453414052724838, + "loss/real": 0.318087637424469, + "rewards/accuracies": 0.9375, + "rewards/generated": -616.0235595703125, + "rewards/margins": 608.9860229492188, + "rewards/real": -7.037555694580078, + "step": 312 + }, + { + "epoch": 0.65, + "grad_norm": 38.516396734154824, + "learning_rate": 1.606922775497168e-07, + "logits/generated": -0.19997042417526245, + "logits/oppo_generated": -2.8177504539489746, + "logits/oppo_real": -2.7674360275268555, + "logits/real": -2.163430690765381, + "logps/generated": -664.20068359375, + "logps/oppo_gen": -76.41138458251953, + "logps/oppo_real": -294.525634765625, + "logps/real": -269.033935546875, + "loss": 0.2597, + "loss/gen": 0.042983490973711014, + "loss/real": 0.16210412979125977, + "rewards/accuracies": 1.0, + "rewards/generated": -587.789306640625, + "rewards/margins": 613.281005859375, + "rewards/real": 25.491722106933594, + "step": 313 + }, + { + "epoch": 0.66, + "grad_norm": 25.751603456220085, + "learning_rate": 1.5898868895896332e-07, + "logits/generated": 0.7687809467315674, + "logits/oppo_generated": -2.704318046569824, + "logits/oppo_real": -2.5222792625427246, + "logits/real": -2.074397325515747, + "logps/generated": -562.7529296875, + "logps/oppo_gen": -56.72496032714844, + "logps/oppo_real": -257.3275451660156, + "logps/real": -243.65383911132812, + "loss": 0.2979, + "loss/gen": 0.06320726126432419, + "loss/real": 0.2052139937877655, + "rewards/accuracies": 1.0, + "rewards/generated": -506.0279235839844, + "rewards/margins": 519.70166015625, + "rewards/real": 13.673715591430664, + "step": 314 + }, + { + "epoch": 0.66, + "grad_norm": 19.927441011666236, + "learning_rate": 1.572899583543671e-07, + "logits/generated": -0.32504546642303467, + "logits/oppo_generated": -2.8547208309173584, + "logits/oppo_real": -3.0012588500976562, + "logits/real": -2.420483112335205, + "logps/generated": -661.608642578125, + "logps/oppo_gen": -89.96110534667969, + "logps/oppo_real": -324.48052978515625, + "logps/real": -324.35284423828125, + "loss": 0.302, + "loss/gen": 0.042295072227716446, + "loss/real": 0.25394028425216675, + "rewards/accuracies": 1.0, + "rewards/generated": -571.6475830078125, + "rewards/margins": 571.7752685546875, + "rewards/real": 0.12767601013183594, + "step": 315 + }, + { + "epoch": 0.66, + "grad_norm": 47.51428907197944, + "learning_rate": 1.5559617641047885e-07, + "logits/generated": -0.6027528643608093, + "logits/oppo_generated": -2.6286120414733887, + "logits/oppo_real": -2.6878955364227295, + "logits/real": -2.110302448272705, + "logps/generated": -716.825439453125, + "logps/oppo_gen": -84.58811950683594, + "logps/oppo_real": -344.87701416015625, + "logps/real": -320.06689453125, + "loss": 0.3296, + "loss/gen": 0.036832332611083984, + "loss/real": 0.18904441595077515, + "rewards/accuracies": 1.0, + "rewards/generated": -632.2373046875, + "rewards/margins": 657.04736328125, + "rewards/real": 24.81012725830078, + "step": 316 + }, + { + "epoch": 0.66, + "grad_norm": 37.112358432704056, + "learning_rate": 1.5390743353770108e-07, + "logits/generated": -0.28523945808410645, + "logits/oppo_generated": -2.688891887664795, + "logits/oppo_real": -2.580460548400879, + "logits/real": -2.2062671184539795, + "logps/generated": -614.7840576171875, + "logps/oppo_gen": -74.83438110351562, + "logps/oppo_real": -342.4462890625, + "logps/real": -363.833984375, + "loss": 0.3025, + "loss/gen": 0.03874469920992851, + "loss/real": 0.3939588665962219, + "rewards/accuracies": 0.9375, + "rewards/generated": -539.94970703125, + "rewards/margins": 518.5621337890625, + "rewards/real": -21.387653350830078, + "step": 317 + }, + { + "epoch": 0.67, + "grad_norm": 19.430635374697687, + "learning_rate": 1.5222381987746102e-07, + "logits/generated": 0.6181946396827698, + "logits/oppo_generated": -2.5959243774414062, + "logits/oppo_real": -2.7141470909118652, + "logits/real": -1.8983464241027832, + "logps/generated": -678.6424560546875, + "logps/oppo_gen": -63.21453857421875, + "logps/oppo_real": -254.0829620361328, + "logps/real": -251.00709533691406, + "loss": 0.2389, + "loss/gen": 0.03314092755317688, + "loss/real": 0.27147579193115234, + "rewards/accuracies": 1.0, + "rewards/generated": -615.4279174804688, + "rewards/margins": 618.5037841796875, + "rewards/real": 3.075847625732422, + "step": 318 + }, + { + "epoch": 0.67, + "grad_norm": 49.08751352813666, + "learning_rate": 1.5054542529740008e-07, + "logits/generated": 0.9991955161094666, + "logits/oppo_generated": -2.704498052597046, + "logits/oppo_real": -2.7787587642669678, + "logits/real": -2.1585323810577393, + "logps/generated": -607.084228515625, + "logps/oppo_gen": -58.10862731933594, + "logps/oppo_real": -242.32754516601562, + "logps/real": -258.293701171875, + "loss": 0.3173, + "loss/gen": 0.08082648366689682, + "loss/real": 0.38607895374298096, + "rewards/accuracies": 0.9375, + "rewards/generated": -548.9755859375, + "rewards/margins": 533.0094604492188, + "rewards/real": -15.966143608093262, + "step": 319 + }, + { + "epoch": 0.67, + "grad_norm": 55.92749343108785, + "learning_rate": 1.488723393865766e-07, + "logits/generated": -0.43444064259529114, + "logits/oppo_generated": -2.8027491569519043, + "logits/oppo_real": -2.7032413482666016, + "logits/real": -2.063422203063965, + "logps/generated": -628.6464233398438, + "logps/oppo_gen": -73.10235595703125, + "logps/oppo_real": -248.4849853515625, + "logps/real": -226.80807495117188, + "loss": 0.3594, + "loss/gen": 0.17635869979858398, + "loss/real": 0.16784903407096863, + "rewards/accuracies": 1.0, + "rewards/generated": -555.5441284179688, + "rewards/margins": 577.2210693359375, + "rewards/real": 21.676923751831055, + "step": 320 + }, + { + "epoch": 0.67, + "grad_norm": 25.093725677136405, + "learning_rate": 1.472046514506832e-07, + "logits/generated": 1.7986483573913574, + "logits/oppo_generated": -2.8599579334259033, + "logits/oppo_real": -2.651264190673828, + "logits/real": -2.081268787384033, + "logps/generated": -733.37744140625, + "logps/oppo_gen": -88.89913940429688, + "logps/oppo_real": -204.5109405517578, + "logps/real": -176.57421875, + "loss": 0.313, + "loss/gen": 0.029537349939346313, + "loss/real": 0.1576533019542694, + "rewards/accuracies": 1.0, + "rewards/generated": -644.478271484375, + "rewards/margins": 672.4150390625, + "rewards/real": 27.93672752380371, + "step": 321 + }, + { + "epoch": 0.67, + "grad_norm": 37.83629481170328, + "learning_rate": 1.4554245050728084e-07, + "logits/generated": 0.15214265882968903, + "logits/oppo_generated": -2.7803587913513184, + "logits/oppo_real": -2.8964691162109375, + "logits/real": -2.03551983833313, + "logps/generated": -604.7355346679688, + "logps/oppo_gen": -65.7453384399414, + "logps/oppo_real": -243.90567016601562, + "logps/real": -237.9889678955078, + "loss": 0.3164, + "loss/gen": 0.10329093784093857, + "loss/real": 0.22754207253456116, + "rewards/accuracies": 1.0, + "rewards/generated": -538.990234375, + "rewards/margins": 544.9068603515625, + "rewards/real": 5.916713714599609, + "step": 322 + }, + { + "epoch": 0.68, + "grad_norm": 38.617218320086835, + "learning_rate": 1.4388582528104627e-07, + "logits/generated": 0.374458372592926, + "logits/oppo_generated": -2.3750133514404297, + "logits/oppo_real": -2.4892232418060303, + "logits/real": -1.7002949714660645, + "logps/generated": -648.1957397460938, + "logps/oppo_gen": -89.67547607421875, + "logps/oppo_real": -299.82208251953125, + "logps/real": -309.90496826171875, + "loss": 0.3357, + "loss/gen": 0.21774569153785706, + "loss/real": 0.3294346332550049, + "rewards/accuracies": 1.0, + "rewards/generated": -558.520263671875, + "rewards/margins": 548.4373779296875, + "rewards/real": -10.082886695861816, + "step": 323 + }, + { + "epoch": 0.68, + "grad_norm": 64.62729686440552, + "learning_rate": 1.422348641990369e-07, + "logits/generated": 0.43694326281547546, + "logits/oppo_generated": -2.5319583415985107, + "logits/oppo_real": -2.6248130798339844, + "logits/real": -1.630281686782837, + "logps/generated": -490.3382873535156, + "logps/oppo_gen": -65.50686645507812, + "logps/oppo_real": -199.474853515625, + "logps/real": -189.73822021484375, + "loss": 0.3563, + "loss/gen": 0.37786510586738586, + "loss/real": 0.21018007397651672, + "rewards/accuracies": 1.0, + "rewards/generated": -424.8314208984375, + "rewards/margins": 434.56805419921875, + "rewards/real": 9.736638069152832, + "step": 324 + }, + { + "epoch": 0.68, + "grad_norm": 62.558780952147316, + "learning_rate": 1.4058965538597032e-07, + "logits/generated": 0.40667301416397095, + "logits/oppo_generated": -2.3989098072052, + "logits/oppo_real": -2.523897171020508, + "logits/real": -1.7344365119934082, + "logps/generated": -598.157470703125, + "logps/oppo_gen": -62.83578109741211, + "logps/oppo_real": -293.08514404296875, + "logps/real": -234.18716430664062, + "loss": 0.3112, + "loss/gen": 0.15413987636566162, + "loss/real": 0.14755874872207642, + "rewards/accuracies": 1.0, + "rewards/generated": -535.3216552734375, + "rewards/margins": 594.2197265625, + "rewards/real": 58.897987365722656, + "step": 325 + }, + { + "epoch": 0.68, + "grad_norm": 20.831581876247345, + "learning_rate": 1.3895028665952057e-07, + "logits/generated": -0.0944506824016571, + "logits/oppo_generated": -2.80672025680542, + "logits/oppo_real": -2.7922816276550293, + "logits/real": -2.3034706115722656, + "logps/generated": -768.2994384765625, + "logps/oppo_gen": -82.2605972290039, + "logps/oppo_real": -276.97515869140625, + "logps/real": -247.0251922607422, + "loss": 0.2874, + "loss/gen": 0.016872398555278778, + "loss/real": 0.153561532497406, + "rewards/accuracies": 1.0, + "rewards/generated": -686.038818359375, + "rewards/margins": 715.9888305664062, + "rewards/real": 29.94999122619629, + "step": 326 + }, + { + "epoch": 0.68, + "grad_norm": 45.22126591134972, + "learning_rate": 1.3731684552563027e-07, + "logits/generated": -0.44738227128982544, + "logits/oppo_generated": -2.735718250274658, + "logits/oppo_real": -2.7187423706054688, + "logits/real": -2.1731090545654297, + "logps/generated": -783.983154296875, + "logps/oppo_gen": -86.40644836425781, + "logps/oppo_real": -320.1417236328125, + "logps/real": -296.46685791015625, + "loss": 0.341, + "loss/gen": 0.11403728276491165, + "loss/real": 0.1636250913143158, + "rewards/accuracies": 1.0, + "rewards/generated": -697.57666015625, + "rewards/margins": 721.25146484375, + "rewards/real": 23.674882888793945, + "step": 327 + }, + { + "epoch": 0.69, + "grad_norm": 38.26118574954176, + "learning_rate": 1.3568941917384036e-07, + "logits/generated": 0.22915993630886078, + "logits/oppo_generated": -2.673722267150879, + "logits/oppo_real": -2.721848964691162, + "logits/real": -1.9900763034820557, + "logps/generated": -759.6171264648438, + "logps/oppo_gen": -78.38516235351562, + "logps/oppo_real": -173.74859619140625, + "logps/real": -193.63247680664062, + "loss": 0.3319, + "loss/gen": 0.11070828139781952, + "loss/real": 0.3567718267440796, + "rewards/accuracies": 1.0, + "rewards/generated": -681.23193359375, + "rewards/margins": 661.3480834960938, + "rewards/real": -19.883878707885742, + "step": 328 + }, + { + "epoch": 0.69, + "grad_norm": 29.444253264312355, + "learning_rate": 1.3406809447263568e-07, + "logits/generated": -0.4770505428314209, + "logits/oppo_generated": -2.7386703491210938, + "logits/oppo_real": -2.7967844009399414, + "logits/real": -2.254525899887085, + "logps/generated": -660.37744140625, + "logps/oppo_gen": -91.2105712890625, + "logps/oppo_real": -351.5929870605469, + "logps/real": -323.23577880859375, + "loss": 0.3176, + "loss/gen": 0.12337271124124527, + "loss/real": 0.15685990452766418, + "rewards/accuracies": 1.0, + "rewards/generated": -569.1668701171875, + "rewards/margins": 597.5240478515625, + "rewards/real": 28.35721206665039, + "step": 329 + }, + { + "epoch": 0.69, + "grad_norm": 31.31519761333944, + "learning_rate": 1.3245295796480788e-07, + "logits/generated": 0.2074773609638214, + "logits/oppo_generated": -2.814140796661377, + "logits/oppo_real": -2.840679883956909, + "logits/real": -2.250474452972412, + "logps/generated": -677.9022216796875, + "logps/oppo_gen": -80.07135009765625, + "logps/oppo_real": -288.6474304199219, + "logps/real": -282.9855041503906, + "loss": 0.3606, + "loss/gen": 0.05122773349285126, + "loss/real": 0.22256958484649658, + "rewards/accuracies": 1.0, + "rewards/generated": -597.8309326171875, + "rewards/margins": 603.492919921875, + "rewards/real": 5.661938190460205, + "step": 330 + }, + { + "epoch": 0.69, + "grad_norm": 35.277156676851455, + "learning_rate": 1.3084409586283694e-07, + "logits/generated": 0.09115093946456909, + "logits/oppo_generated": -2.589115858078003, + "logits/oppo_real": -2.5021204948425293, + "logits/real": -2.0292158126831055, + "logps/generated": -757.2649536132812, + "logps/oppo_gen": -103.41107177734375, + "logps/oppo_real": -336.0791320800781, + "logps/real": -308.29779052734375, + "loss": 0.2886, + "loss/gen": 0.0286662969738245, + "loss/real": 0.15769243240356445, + "rewards/accuracies": 1.0, + "rewards/generated": -653.8538818359375, + "rewards/margins": 681.6351928710938, + "rewards/real": 27.78131103515625, + "step": 331 + }, + { + "epoch": 0.69, + "grad_norm": 39.39202106064358, + "learning_rate": 1.2924159404428801e-07, + "logits/generated": 0.2052621841430664, + "logits/oppo_generated": -2.5180981159210205, + "logits/oppo_real": -2.463376045227051, + "logits/real": -1.8674006462097168, + "logps/generated": -663.0791015625, + "logps/oppo_gen": -77.07872009277344, + "logps/oppo_real": -306.44830322265625, + "logps/real": -267.9314270019531, + "loss": 0.304, + "loss/gen": 0.15333253145217896, + "loss/real": 0.17717936635017395, + "rewards/accuracies": 0.9375, + "rewards/generated": -586.0004272460938, + "rewards/margins": 624.517333984375, + "rewards/real": 38.516883850097656, + "step": 332 + }, + { + "epoch": 0.7, + "grad_norm": 26.922403234080672, + "learning_rate": 1.2764553804722867e-07, + "logits/generated": 0.24261078238487244, + "logits/oppo_generated": -2.7135229110717773, + "logits/oppo_real": -2.7584338188171387, + "logits/real": -2.0417680740356445, + "logps/generated": -574.452880859375, + "logps/oppo_gen": -63.32374572753906, + "logps/oppo_real": -163.67233276367188, + "logps/real": -160.25054931640625, + "loss": 0.3333, + "loss/gen": 0.17099058628082275, + "loss/real": 0.23388735949993134, + "rewards/accuracies": 1.0, + "rewards/generated": -511.129150390625, + "rewards/margins": 514.5509033203125, + "rewards/real": 3.4218015670776367, + "step": 333 + }, + { + "epoch": 0.7, + "grad_norm": 27.14057664971226, + "learning_rate": 1.2605601306566204e-07, + "logits/generated": 0.07767532020807266, + "logits/oppo_generated": -2.9221487045288086, + "logits/oppo_real": -3.001983165740967, + "logits/real": -2.4487013816833496, + "logps/generated": -646.2496337890625, + "logps/oppo_gen": -80.82681274414062, + "logps/oppo_real": -307.2742919921875, + "logps/real": -280.507080078125, + "loss": 0.2789, + "loss/gen": 0.042075518518686295, + "loss/real": 0.15917706489562988, + "rewards/accuracies": 1.0, + "rewards/generated": -565.4228515625, + "rewards/margins": 592.1900634765625, + "rewards/real": 26.767179489135742, + "step": 334 + }, + { + "epoch": 0.7, + "grad_norm": 27.50873995525388, + "learning_rate": 1.2447310394498017e-07, + "logits/generated": 0.4118821918964386, + "logits/oppo_generated": -2.5506510734558105, + "logits/oppo_real": -2.6206767559051514, + "logits/real": -1.6342732906341553, + "logps/generated": -726.49755859375, + "logps/oppo_gen": -65.73345947265625, + "logps/oppo_real": -223.48028564453125, + "logps/real": -207.2204132080078, + "loss": 0.3043, + "loss/gen": 0.024275805801153183, + "loss/real": 0.2104792445898056, + "rewards/accuracies": 1.0, + "rewards/generated": -660.76416015625, + "rewards/margins": 677.0239868164062, + "rewards/real": 16.259868621826172, + "step": 335 + }, + { + "epoch": 0.7, + "grad_norm": 43.03536881159496, + "learning_rate": 1.2289689517743472e-07, + "logits/generated": 0.2103143334388733, + "logits/oppo_generated": -2.7872653007507324, + "logits/oppo_real": -2.8988583087921143, + "logits/real": -2.260883092880249, + "logps/generated": -628.6256103515625, + "logps/oppo_gen": -63.961341857910156, + "logps/oppo_real": -180.2196044921875, + "logps/real": -161.49661254882812, + "loss": 0.274, + "loss/gen": 0.06484713405370712, + "loss/real": 0.17386212944984436, + "rewards/accuracies": 1.0, + "rewards/generated": -564.6642456054688, + "rewards/margins": 583.3872680664062, + "rewards/real": 18.722999572753906, + "step": 336 + }, + { + "epoch": 0.71, + "grad_norm": 35.90554285639614, + "learning_rate": 1.213274708976271e-07, + "logits/generated": 0.4222411513328552, + "logits/oppo_generated": -2.6874637603759766, + "logits/oppo_real": -2.745856761932373, + "logits/real": -1.9721487760543823, + "logps/generated": -692.657958984375, + "logps/oppo_gen": -80.49569702148438, + "logps/oppo_real": -223.6146240234375, + "logps/real": -244.1184844970703, + "loss": 0.385, + "loss/gen": 0.04486394301056862, + "loss/real": 0.4462623596191406, + "rewards/accuracies": 0.9375, + "rewards/generated": -612.1622314453125, + "rewards/margins": 591.6583251953125, + "rewards/real": -20.50387191772461, + "step": 337 + }, + { + "epoch": 0.71, + "grad_norm": 42.34218026380063, + "learning_rate": 1.1976491487801746e-07, + "logits/generated": 0.6293839812278748, + "logits/oppo_generated": -2.5948119163513184, + "logits/oppo_real": -2.5506985187530518, + "logits/real": -1.8280091285705566, + "logps/generated": -798.2767333984375, + "logps/oppo_gen": -65.61744689941406, + "logps/oppo_real": -219.27554321289062, + "logps/real": -223.35015869140625, + "loss": 0.3611, + "loss/gen": 0.14065881073474884, + "loss/real": 0.34547215700149536, + "rewards/accuracies": 1.0, + "rewards/generated": -732.6593017578125, + "rewards/margins": 728.584716796875, + "rewards/real": -4.07459831237793, + "step": 338 + }, + { + "epoch": 0.71, + "grad_norm": 31.3995649368372, + "learning_rate": 1.1820931052445297e-07, + "logits/generated": 1.3286151885986328, + "logits/oppo_generated": -2.7529683113098145, + "logits/oppo_real": -2.6667518615722656, + "logits/real": -1.918984293937683, + "logps/generated": -792.933349609375, + "logps/oppo_gen": -78.03943634033203, + "logps/oppo_real": -245.19813537597656, + "logps/real": -264.14959716796875, + "loss": 0.3132, + "loss/gen": 0.143991157412529, + "loss/real": 0.3856561779975891, + "rewards/accuracies": 0.9375, + "rewards/generated": -714.8939208984375, + "rewards/margins": 695.9423828125, + "rewards/real": -18.951465606689453, + "step": 339 + }, + { + "epoch": 0.71, + "grad_norm": 42.54585646503958, + "learning_rate": 1.1666074087171627e-07, + "logits/generated": 0.2029736191034317, + "logits/oppo_generated": -2.6604325771331787, + "logits/oppo_real": -2.737955093383789, + "logits/real": -2.145906448364258, + "logps/generated": -556.6988525390625, + "logps/oppo_gen": -53.90046691894531, + "logps/oppo_real": -178.48764038085938, + "logps/real": -149.980224609375, + "loss": 0.2652, + "loss/gen": 0.19677889347076416, + "loss/real": 0.15636593103408813, + "rewards/accuracies": 1.0, + "rewards/generated": -502.79840087890625, + "rewards/margins": 531.3058471679688, + "rewards/real": 28.507417678833008, + "step": 340 + }, + { + "epoch": 0.71, + "grad_norm": 26.573972706214345, + "learning_rate": 1.1511928857909264e-07, + "logits/generated": -0.5622884035110474, + "logits/oppo_generated": -2.7561309337615967, + "logits/oppo_real": -2.7139782905578613, + "logits/real": -2.2837305068969727, + "logps/generated": -767.5333862304688, + "logps/oppo_gen": -94.01554870605469, + "logps/oppo_real": -280.5321044921875, + "logps/real": -261.78564453125, + "loss": 0.2716, + "loss/gen": 0.024503124877810478, + "loss/real": 0.17600321769714355, + "rewards/accuracies": 1.0, + "rewards/generated": -673.517822265625, + "rewards/margins": 692.2642822265625, + "rewards/real": 18.7464599609375, + "step": 341 + }, + { + "epoch": 0.72, + "grad_norm": 48.54658583354498, + "learning_rate": 1.1358503592595837e-07, + "logits/generated": 0.8509594202041626, + "logits/oppo_generated": -2.813668727874756, + "logits/oppo_real": -2.8454103469848633, + "logits/real": -2.246967077255249, + "logps/generated": -787.0734252929688, + "logps/oppo_gen": -83.64794158935547, + "logps/oppo_real": -344.2013854980469, + "logps/real": -316.9454345703125, + "loss": 0.3343, + "loss/gen": 0.08634554594755173, + "loss/real": 0.16001561284065247, + "rewards/accuracies": 1.0, + "rewards/generated": -703.425537109375, + "rewards/margins": 730.6814575195312, + "rewards/real": 27.255950927734375, + "step": 342 + }, + { + "epoch": 0.72, + "grad_norm": 26.039506681255023, + "learning_rate": 1.120580648073885e-07, + "logits/generated": 0.20105311274528503, + "logits/oppo_generated": -2.6984949111938477, + "logits/oppo_real": -2.83193302154541, + "logits/real": -1.8173322677612305, + "logps/generated": -666.4171142578125, + "logps/oppo_gen": -75.18173217773438, + "logps/oppo_real": -207.76528930664062, + "logps/real": -187.3746795654297, + "loss": 0.275, + "loss/gen": 0.20881646871566772, + "loss/real": 0.180282324552536, + "rewards/accuracies": 1.0, + "rewards/generated": -591.2354125976562, + "rewards/margins": 611.6259765625, + "rewards/real": 20.390621185302734, + "step": 343 + }, + { + "epoch": 0.72, + "grad_norm": 56.60992918235502, + "learning_rate": 1.1053845672978565e-07, + "logits/generated": 1.0070171356201172, + "logits/oppo_generated": -2.7348833084106445, + "logits/oppo_real": -2.7071237564086914, + "logits/real": -1.6740999221801758, + "logps/generated": -795.6455078125, + "logps/oppo_gen": -76.39961242675781, + "logps/oppo_real": -203.86724853515625, + "logps/real": -246.10214233398438, + "loss": 0.3551, + "loss/gen": 0.02106640115380287, + "loss/real": 0.5011658072471619, + "rewards/accuracies": 1.0, + "rewards/generated": -719.2459106445312, + "rewards/margins": 677.0111083984375, + "rewards/real": -42.23489761352539, + "step": 344 + }, + { + "epoch": 0.72, + "grad_norm": 35.033887751941336, + "learning_rate": 1.090262928065293e-07, + "logits/generated": 0.5341898798942566, + "logits/oppo_generated": -2.7704925537109375, + "logits/oppo_real": -2.7265381813049316, + "logits/real": -1.9755090475082397, + "logps/generated": -791.3928833007812, + "logps/oppo_gen": -85.65669250488281, + "logps/oppo_real": -332.951904296875, + "logps/real": -323.0646667480469, + "loss": 0.3046, + "loss/gen": 0.014689632691442966, + "loss/real": 0.23723618686199188, + "rewards/accuracies": 1.0, + "rewards/generated": -705.7362060546875, + "rewards/margins": 715.6234130859375, + "rewards/real": 9.887211799621582, + "step": 345 + }, + { + "epoch": 0.72, + "grad_norm": 68.98002003524282, + "learning_rate": 1.0752165375364591e-07, + "logits/generated": 0.32197409868240356, + "logits/oppo_generated": -2.6534223556518555, + "logits/oppo_real": -2.706442356109619, + "logits/real": -1.8222732543945312, + "logps/generated": -825.9232788085938, + "logps/oppo_gen": -88.76399993896484, + "logps/oppo_real": -302.44403076171875, + "logps/real": -287.33404541015625, + "loss": 0.2836, + "loss/gen": 0.07158385217189789, + "loss/real": 0.23982387781143188, + "rewards/accuracies": 1.0, + "rewards/generated": -737.1593017578125, + "rewards/margins": 752.269287109375, + "rewards/real": 15.1099853515625, + "step": 346 + }, + { + "epoch": 0.73, + "grad_norm": 49.98578411189776, + "learning_rate": 1.060246198855011e-07, + "logits/generated": 0.27689942717552185, + "logits/oppo_generated": -2.723323345184326, + "logits/oppo_real": -2.7297825813293457, + "logits/real": -1.9392592906951904, + "logps/generated": -671.2532958984375, + "logps/oppo_gen": -64.74773406982422, + "logps/oppo_real": -236.87423706054688, + "logps/real": -226.2437744140625, + "loss": 0.284, + "loss/gen": 0.04255779832601547, + "loss/real": 0.24799221754074097, + "rewards/accuracies": 1.0, + "rewards/generated": -606.5055541992188, + "rewards/margins": 617.135986328125, + "rewards/real": 10.630456924438477, + "step": 347 + }, + { + "epoch": 0.73, + "grad_norm": 26.12380146398467, + "learning_rate": 1.0453527111051183e-07, + "logits/generated": 0.04960054159164429, + "logits/oppo_generated": -2.8397562503814697, + "logits/oppo_real": -2.856273651123047, + "logits/real": -2.319634199142456, + "logps/generated": -784.1358642578125, + "logps/oppo_gen": -91.67137145996094, + "logps/oppo_real": -355.03753662109375, + "logps/real": -327.039794921875, + "loss": 0.3013, + "loss/gen": 0.01843111217021942, + "loss/real": 0.15837496519088745, + "rewards/accuracies": 1.0, + "rewards/generated": -692.4644775390625, + "rewards/margins": 720.462158203125, + "rewards/real": 27.997737884521484, + "step": 348 + }, + { + "epoch": 0.73, + "grad_norm": 48.17637781014934, + "learning_rate": 1.0305368692688174e-07, + "logits/generated": 1.1829630136489868, + "logits/oppo_generated": -2.7997734546661377, + "logits/oppo_real": -2.955216884613037, + "logits/real": -2.1545634269714355, + "logps/generated": -692.6908569335938, + "logps/oppo_gen": -76.2406005859375, + "logps/oppo_real": -269.97576904296875, + "logps/real": -239.1646728515625, + "loss": 0.3035, + "loss/gen": 0.019039634615182877, + "loss/real": 0.15278419852256775, + "rewards/accuracies": 1.0, + "rewards/generated": -616.4503173828125, + "rewards/margins": 647.2613525390625, + "rewards/real": 30.81109619140625, + "step": 349 + }, + { + "epoch": 0.73, + "grad_norm": 38.29671243845547, + "learning_rate": 1.0157994641835734e-07, + "logits/generated": 1.381691813468933, + "logits/oppo_generated": -2.750364303588867, + "logits/oppo_real": -2.6481316089630127, + "logits/real": -2.0888710021972656, + "logps/generated": -985.8902587890625, + "logps/oppo_gen": -67.35143280029297, + "logps/oppo_real": -269.8861083984375, + "logps/real": -248.64231872558594, + "loss": 0.3397, + "loss/gen": 0.00863666832447052, + "loss/real": 0.17384257912635803, + "rewards/accuracies": 1.0, + "rewards/generated": -918.538818359375, + "rewards/margins": 939.7825927734375, + "rewards/real": 21.243799209594727, + "step": 350 + }, + { + "epoch": 0.73, + "grad_norm": 35.82324568824864, + "learning_rate": 1.0011412825000693e-07, + "logits/generated": 0.5441445708274841, + "logits/oppo_generated": -2.8472161293029785, + "logits/oppo_real": -2.733160972595215, + "logits/real": -2.123514175415039, + "logps/generated": -677.7409057617188, + "logps/oppo_gen": -73.3748779296875, + "logps/oppo_real": -268.7559509277344, + "logps/real": -268.14208984375, + "loss": 0.2839, + "loss/gen": 0.022950371727347374, + "loss/real": 0.2343311309814453, + "rewards/accuracies": 1.0, + "rewards/generated": -604.3660888671875, + "rewards/margins": 604.97998046875, + "rewards/real": 0.6138732433319092, + "step": 351 + }, + { + "epoch": 0.74, + "grad_norm": 34.41360403355227, + "learning_rate": 9.865631066402136e-08, + "logits/generated": 1.2953248023986816, + "logits/oppo_generated": -2.5277109146118164, + "logits/oppo_real": -2.492619514465332, + "logits/real": -2.0883584022521973, + "logps/generated": -750.603515625, + "logps/oppo_gen": -61.516632080078125, + "logps/oppo_real": -233.42202758789062, + "logps/real": -203.82994079589844, + "loss": 0.305, + "loss/gen": 0.016972701996564865, + "loss/real": 0.1576637625694275, + "rewards/accuracies": 1.0, + "rewards/generated": -689.0869140625, + "rewards/margins": 718.678955078125, + "rewards/real": 29.592084884643555, + "step": 352 + }, + { + "epoch": 0.74, + "grad_norm": 85.37379322902608, + "learning_rate": 9.720657147553767e-08, + "logits/generated": 1.9441397190093994, + "logits/oppo_generated": -2.8127946853637695, + "logits/oppo_real": -2.8368353843688965, + "logits/real": -1.9017053842544556, + "logps/generated": -745.4407958984375, + "logps/oppo_gen": -76.67695617675781, + "logps/oppo_real": -326.9615783691406, + "logps/real": -336.90020751953125, + "loss": 0.3262, + "loss/gen": 0.009169317781925201, + "loss/real": 0.3471587896347046, + "rewards/accuracies": 1.0, + "rewards/generated": -668.7637939453125, + "rewards/margins": 658.8252563476562, + "rewards/real": -9.938613891601562, + "step": 353 + }, + { + "epoch": 0.74, + "grad_norm": 28.62276106828052, + "learning_rate": 9.57649880684859e-08, + "logits/generated": 0.5683245658874512, + "logits/oppo_generated": -2.7986297607421875, + "logits/oppo_real": -2.700263500213623, + "logits/real": -1.9732091426849365, + "logps/generated": -679.8853759765625, + "logps/oppo_gen": -71.45687103271484, + "logps/oppo_real": -221.93785095214844, + "logps/real": -217.52499389648438, + "loss": 0.2688, + "loss/gen": 0.09031351655721664, + "loss/real": 0.2791171371936798, + "rewards/accuracies": 1.0, + "rewards/generated": -608.4285278320312, + "rewards/margins": 612.84130859375, + "rewards/real": 4.412837982177734, + "step": 354 + }, + { + "epoch": 0.74, + "grad_norm": 35.88830872477833, + "learning_rate": 9.433163739145771e-08, + "logits/generated": -0.6793491244316101, + "logits/oppo_generated": -2.8078927993774414, + "logits/oppo_real": -2.8096060752868652, + "logits/real": -2.194153308868408, + "logps/generated": -783.4717407226562, + "logps/oppo_gen": -86.69573974609375, + "logps/oppo_real": -399.6392822265625, + "logps/real": -386.584716796875, + "loss": 0.2717, + "loss/gen": 0.0438024066388607, + "loss/real": 0.23390790820121765, + "rewards/accuracies": 1.0, + "rewards/generated": -696.7760009765625, + "rewards/margins": 709.83056640625, + "rewards/real": 13.0545654296875, + "step": 355 + }, + { + "epoch": 0.74, + "grad_norm": 38.479150452306634, + "learning_rate": 9.290659595360017e-08, + "logits/generated": 0.6003881692886353, + "logits/oppo_generated": -2.6745800971984863, + "logits/oppo_real": -2.8772976398468018, + "logits/real": -1.8411824703216553, + "logps/generated": -703.7737426757812, + "logps/oppo_gen": -74.71858215332031, + "logps/oppo_real": -277.74951171875, + "logps/real": -292.45513916015625, + "loss": 0.386, + "loss/gen": 0.03084971383213997, + "loss/real": 0.34726041555404663, + "rewards/accuracies": 1.0, + "rewards/generated": -629.05517578125, + "rewards/margins": 614.3494873046875, + "rewards/real": -14.705662727355957, + "step": 356 + }, + { + "epoch": 0.75, + "grad_norm": 20.115341996659616, + "learning_rate": 9.148993982053058e-08, + "logits/generated": 0.06593459844589233, + "logits/oppo_generated": -2.825925350189209, + "logits/oppo_real": -2.854370594024658, + "logits/real": -1.9273500442504883, + "logps/generated": -762.333251953125, + "logps/oppo_gen": -91.21415710449219, + "logps/oppo_real": -272.8486328125, + "logps/real": -282.2558898925781, + "loss": 0.274, + "loss/gen": 0.03180719166994095, + "loss/real": 0.26319044828414917, + "rewards/accuracies": 1.0, + "rewards/generated": -671.1190795898438, + "rewards/margins": 661.7117919921875, + "rewards/real": -9.407278060913086, + "step": 357 + }, + { + "epoch": 0.75, + "grad_norm": 29.17447660968777, + "learning_rate": 9.008174461027723e-08, + "logits/generated": 0.7246966361999512, + "logits/oppo_generated": -2.6950693130493164, + "logits/oppo_real": -2.750147819519043, + "logits/real": -2.081655979156494, + "logps/generated": -689.679443359375, + "logps/oppo_gen": -83.51091003417969, + "logps/oppo_real": -333.22991943359375, + "logps/real": -327.7589416503906, + "loss": 0.2634, + "loss/gen": 0.07645511627197266, + "loss/real": 0.25844529271125793, + "rewards/accuracies": 0.9375, + "rewards/generated": -606.1685791015625, + "rewards/margins": 611.6395263671875, + "rewards/real": 5.470993995666504, + "step": 358 + }, + { + "epoch": 0.75, + "grad_norm": 24.78207384546401, + "learning_rate": 8.868208548924253e-08, + "logits/generated": 0.16621741652488708, + "logits/oppo_generated": -2.6902928352355957, + "logits/oppo_real": -2.733646869659424, + "logits/real": -2.0168347358703613, + "logps/generated": -673.65087890625, + "logps/oppo_gen": -75.88461303710938, + "logps/oppo_real": -268.97918701171875, + "logps/real": -247.7440643310547, + "loss": 0.2872, + "loss/gen": 0.03719499707221985, + "loss/real": 0.188075989484787, + "rewards/accuracies": 1.0, + "rewards/generated": -597.7662353515625, + "rewards/margins": 619.0014038085938, + "rewards/real": 21.235126495361328, + "step": 359 + }, + { + "epoch": 0.75, + "grad_norm": 34.72498853997741, + "learning_rate": 8.729103716819111e-08, + "logits/generated": 0.26208943128585815, + "logits/oppo_generated": -2.67659068107605, + "logits/oppo_real": -2.6717331409454346, + "logits/real": -2.199741840362549, + "logps/generated": -711.75146484375, + "logps/oppo_gen": -72.04103088378906, + "logps/oppo_real": -243.22787475585938, + "logps/real": -215.61843872070312, + "loss": 0.3307, + "loss/gen": 0.23388239741325378, + "loss/real": 0.15823516249656677, + "rewards/accuracies": 1.0, + "rewards/generated": -639.71044921875, + "rewards/margins": 667.31982421875, + "rewards/real": 27.609418869018555, + "step": 360 + }, + { + "epoch": 0.76, + "grad_norm": 26.014443713796403, + "learning_rate": 8.590867389826179e-08, + "logits/generated": -0.7573856115341187, + "logits/oppo_generated": -2.835716962814331, + "logits/oppo_real": -2.741757392883301, + "logits/real": -2.0698163509368896, + "logps/generated": -635.7809448242188, + "logps/oppo_gen": -69.39401245117188, + "logps/oppo_real": -260.70556640625, + "logps/real": -233.06491088867188, + "loss": 0.2933, + "loss/gen": 0.20804080367088318, + "loss/real": 0.15793883800506592, + "rewards/accuracies": 1.0, + "rewards/generated": -566.3869018554688, + "rewards/margins": 594.0275268554688, + "rewards/real": 27.640644073486328, + "step": 361 + }, + { + "epoch": 0.76, + "grad_norm": 34.53451094613999, + "learning_rate": 8.453506946700417e-08, + "logits/generated": 0.09254428744316101, + "logits/oppo_generated": -2.665210008621216, + "logits/oppo_real": -2.8125662803649902, + "logits/real": -1.8576146364212036, + "logps/generated": -872.798828125, + "logps/oppo_gen": -100.13014221191406, + "logps/oppo_real": -193.47467041015625, + "logps/real": -196.80953979492188, + "loss": 0.2903, + "loss/gen": 0.019167516380548477, + "loss/real": 0.2566121816635132, + "rewards/accuracies": 1.0, + "rewards/generated": -772.6685791015625, + "rewards/margins": 769.333740234375, + "rewards/real": -3.3348846435546875, + "step": 362 + }, + { + "epoch": 0.76, + "grad_norm": 61.85891126057547, + "learning_rate": 8.317029719444016e-08, + "logits/generated": 0.3024826645851135, + "logits/oppo_generated": -2.765684127807617, + "logits/oppo_real": -2.8010177612304688, + "logits/real": -2.0826213359832764, + "logps/generated": -720.9824829101562, + "logps/oppo_gen": -92.44059753417969, + "logps/oppo_real": -245.97979736328125, + "logps/real": -244.0174560546875, + "loss": 0.3713, + "loss/gen": 0.07919669896364212, + "loss/real": 0.22272387146949768, + "rewards/accuracies": 1.0, + "rewards/generated": -628.5418701171875, + "rewards/margins": 630.5042724609375, + "rewards/real": 1.962360143661499, + "step": 363 + }, + { + "epoch": 0.76, + "grad_norm": 39.76462271575782, + "learning_rate": 8.181442992915e-08, + "logits/generated": 0.19746087491512299, + "logits/oppo_generated": -2.796973943710327, + "logits/oppo_real": -2.726362705230713, + "logits/real": -1.9224494695663452, + "logps/generated": -806.6083984375, + "logps/oppo_gen": -80.83071899414062, + "logps/oppo_real": -287.78509521484375, + "logps/real": -312.4919738769531, + "loss": 0.315, + "loss/gen": 0.024429194629192352, + "loss/real": 0.4663940668106079, + "rewards/accuracies": 0.9375, + "rewards/generated": -725.7777099609375, + "rewards/margins": 701.0706787109375, + "rewards/real": -24.706878662109375, + "step": 364 + }, + { + "epoch": 0.76, + "grad_norm": 47.83683510610573, + "learning_rate": 8.046754004438428e-08, + "logits/generated": 0.40333959460258484, + "logits/oppo_generated": -2.7642383575439453, + "logits/oppo_real": -2.565757989883423, + "logits/real": -2.1065196990966797, + "logps/generated": -664.4947509765625, + "logps/oppo_gen": -78.50663757324219, + "logps/oppo_real": -220.23593139648438, + "logps/real": -186.36785888671875, + "loss": 0.2479, + "loss/gen": 0.07458934187889099, + "loss/real": 0.15034610033035278, + "rewards/accuracies": 1.0, + "rewards/generated": -585.9880981445312, + "rewards/margins": 619.856201171875, + "rewards/real": 33.868080139160156, + "step": 365 + }, + { + "epoch": 0.77, + "grad_norm": 36.08634934951165, + "learning_rate": 7.912969943420017e-08, + "logits/generated": -0.5886929631233215, + "logits/oppo_generated": -2.8737826347351074, + "logits/oppo_real": -2.7705636024475098, + "logits/real": -1.9518818855285645, + "logps/generated": -682.5374755859375, + "logps/oppo_gen": -81.06214904785156, + "logps/oppo_real": -277.17974853515625, + "logps/real": -259.0784912109375, + "loss": 0.2526, + "loss/gen": 0.13476580381393433, + "loss/real": 0.1966303288936615, + "rewards/accuracies": 1.0, + "rewards/generated": -601.475341796875, + "rewards/margins": 619.57666015625, + "rewards/real": 18.10125732421875, + "step": 366 + }, + { + "epoch": 0.77, + "grad_norm": 52.871022031951725, + "learning_rate": 7.780097950962447e-08, + "logits/generated": 0.373833030462265, + "logits/oppo_generated": -2.6847777366638184, + "logits/oppo_real": -2.5956692695617676, + "logits/real": -2.0350966453552246, + "logps/generated": -751.0484619140625, + "logps/oppo_gen": -76.97508239746094, + "logps/oppo_real": -226.62579345703125, + "logps/real": -217.1838836669922, + "loss": 0.3235, + "loss/gen": 0.02066943421959877, + "loss/real": 0.22452738881111145, + "rewards/accuracies": 1.0, + "rewards/generated": -674.0733642578125, + "rewards/margins": 683.515380859375, + "rewards/real": 9.441933631896973, + "step": 367 + }, + { + "epoch": 0.77, + "grad_norm": 20.391625484315323, + "learning_rate": 7.648145119484151e-08, + "logits/generated": 0.03344006836414337, + "logits/oppo_generated": -2.886763095855713, + "logits/oppo_real": -2.899338960647583, + "logits/real": -2.359893560409546, + "logps/generated": -1293.310546875, + "logps/oppo_gen": -96.46270751953125, + "logps/oppo_real": -380.9791259765625, + "logps/real": -356.67138671875, + "loss": 0.2828, + "loss/gen": 0.015580754727125168, + "loss/real": 0.1647091805934906, + "rewards/accuracies": 1.0, + "rewards/generated": -1196.847900390625, + "rewards/margins": 1221.1556396484375, + "rewards/real": 24.307750701904297, + "step": 368 + }, + { + "epoch": 0.77, + "grad_norm": 32.37493967175846, + "learning_rate": 7.517118492340748e-08, + "logits/generated": 1.0505952835083008, + "logits/oppo_generated": -2.832742691040039, + "logits/oppo_real": -2.962982654571533, + "logits/real": -1.8713144063949585, + "logps/generated": -675.9676513671875, + "logps/oppo_gen": -68.11212158203125, + "logps/oppo_real": -284.201904296875, + "logps/real": -310.61572265625, + "loss": 0.2927, + "loss/gen": 0.026173148304224014, + "loss/real": 0.45966047048568726, + "rewards/accuracies": 1.0, + "rewards/generated": -607.85546875, + "rewards/margins": 581.4417114257812, + "rewards/real": -26.413818359375, + "step": 369 + }, + { + "epoch": 0.77, + "grad_norm": 17.18461090775061, + "learning_rate": 7.387025063449081e-08, + "logits/generated": 1.1518608331680298, + "logits/oppo_generated": -2.6026477813720703, + "logits/oppo_real": -2.6428492069244385, + "logits/real": -1.8430811166763306, + "logps/generated": -653.1893310546875, + "logps/oppo_gen": -60.00908660888672, + "logps/oppo_real": -280.5744323730469, + "logps/real": -255.991455078125, + "loss": 0.2254, + "loss/gen": 0.09616182744503021, + "loss/real": 0.16350211203098297, + "rewards/accuracies": 1.0, + "rewards/generated": -593.1802978515625, + "rewards/margins": 617.7632446289062, + "rewards/real": 24.582962036132812, + "step": 370 + }, + { + "epoch": 0.78, + "grad_norm": 22.43516388104042, + "learning_rate": 7.257871776913879e-08, + "logits/generated": -0.040337368845939636, + "logits/oppo_generated": -2.7108025550842285, + "logits/oppo_real": -2.8590545654296875, + "logits/real": -2.0019431114196777, + "logps/generated": -726.527099609375, + "logps/oppo_gen": -90.550537109375, + "logps/oppo_real": -352.6172790527344, + "logps/real": -318.4084167480469, + "loss": 0.2788, + "loss/gen": 0.053786490112543106, + "loss/real": 0.14998918771743774, + "rewards/accuracies": 1.0, + "rewards/generated": -635.9765625, + "rewards/margins": 670.1854248046875, + "rewards/real": 34.2088623046875, + "step": 371 + }, + { + "epoch": 0.78, + "grad_norm": 37.02779519749319, + "learning_rate": 7.129665526657145e-08, + "logits/generated": 0.5808796882629395, + "logits/oppo_generated": -2.8980014324188232, + "logits/oppo_real": -2.8099284172058105, + "logits/real": -2.2642743587493896, + "logps/generated": -742.5794067382812, + "logps/oppo_gen": -78.37774658203125, + "logps/oppo_real": -325.51953125, + "logps/real": -323.89434814453125, + "loss": 0.2738, + "loss/gen": 0.04419238120317459, + "loss/real": 0.2435626983642578, + "rewards/accuracies": 1.0, + "rewards/generated": -664.20166015625, + "rewards/margins": 665.826904296875, + "rewards/real": 1.6252069473266602, + "step": 372 + }, + { + "epoch": 0.78, + "grad_norm": 22.601374140461225, + "learning_rate": 7.002413156050108e-08, + "logits/generated": 0.8091610670089722, + "logits/oppo_generated": -2.7554728984832764, + "logits/oppo_real": -2.894706964492798, + "logits/real": -2.002586841583252, + "logps/generated": -853.19775390625, + "logps/oppo_gen": -74.49253845214844, + "logps/oppo_real": -317.25726318359375, + "logps/real": -293.00537109375, + "loss": 0.3157, + "loss/gen": 0.011781592853367329, + "loss/real": 0.16653022170066833, + "rewards/accuracies": 1.0, + "rewards/generated": -778.7052612304688, + "rewards/margins": 802.9571533203125, + "rewards/real": 24.25188636779785, + "step": 373 + }, + { + "epoch": 0.78, + "grad_norm": 45.91711654488852, + "learning_rate": 6.876121457547995e-08, + "logits/generated": 0.8897229433059692, + "logits/oppo_generated": -2.9072961807250977, + "logits/oppo_real": -2.758073329925537, + "logits/real": -2.256460189819336, + "logps/generated": -862.1790161132812, + "logps/oppo_gen": -78.95392608642578, + "logps/oppo_real": -264.9425964355469, + "logps/real": -287.25244140625, + "loss": 0.2942, + "loss/gen": 0.04109443724155426, + "loss/real": 0.34802842140197754, + "rewards/accuracies": 1.0, + "rewards/generated": -783.22509765625, + "rewards/margins": 760.915283203125, + "rewards/real": -22.30983543395996, + "step": 374 + }, + { + "epoch": 0.78, + "grad_norm": 44.29882601523572, + "learning_rate": 6.75079717232744e-08, + "logits/generated": -0.22042664885520935, + "logits/oppo_generated": -2.6750731468200684, + "logits/oppo_real": -2.583487033843994, + "logits/real": -1.7571690082550049, + "logps/generated": -773.0181884765625, + "logps/oppo_gen": -91.54539489746094, + "logps/oppo_real": -271.75091552734375, + "logps/real": -281.9573974609375, + "loss": 0.2854, + "loss/gen": 0.1139841377735138, + "loss/real": 0.32424396276474, + "rewards/accuracies": 0.9375, + "rewards/generated": -681.4728393554688, + "rewards/margins": 671.2662963867188, + "rewards/real": -10.206502914428711, + "step": 375 + }, + { + "epoch": 0.79, + "grad_norm": 40.28310459268253, + "learning_rate": 6.626446989926652e-08, + "logits/generated": 0.7616984844207764, + "logits/oppo_generated": -2.8004720211029053, + "logits/oppo_real": -2.6996493339538574, + "logits/real": -1.916635513305664, + "logps/generated": -710.7991943359375, + "logps/oppo_gen": -80.80632019042969, + "logps/oppo_real": -282.40545654296875, + "logps/real": -315.500244140625, + "loss": 0.3345, + "loss/gen": 0.052083853632211685, + "loss/real": 0.46380823850631714, + "rewards/accuracies": 1.0, + "rewards/generated": -629.992919921875, + "rewards/margins": 596.8980712890625, + "rewards/real": -33.09479522705078, + "step": 376 + }, + { + "epoch": 0.79, + "grad_norm": 38.0174151217447, + "learning_rate": 6.503077547888352e-08, + "logits/generated": 0.3516882359981537, + "logits/oppo_generated": -2.7037720680236816, + "logits/oppo_real": -2.8666648864746094, + "logits/real": -1.8362528085708618, + "logps/generated": -685.1957397460938, + "logps/oppo_gen": -63.57231140136719, + "logps/oppo_real": -290.30877685546875, + "logps/real": -291.5946044921875, + "loss": 0.3308, + "loss/gen": 0.07059072703123093, + "loss/real": 0.27046704292297363, + "rewards/accuracies": 1.0, + "rewards/generated": -621.6234130859375, + "rewards/margins": 620.337646484375, + "rewards/real": -1.2858242988586426, + "step": 377 + }, + { + "epoch": 0.79, + "grad_norm": 24.148170847363108, + "learning_rate": 6.380695431405453e-08, + "logits/generated": 0.8600256443023682, + "logits/oppo_generated": -2.863770008087158, + "logits/oppo_real": -2.788264751434326, + "logits/real": -2.4320356845855713, + "logps/generated": -767.5900268554688, + "logps/oppo_gen": -78.98788452148438, + "logps/oppo_real": -346.01312255859375, + "logps/real": -334.597412109375, + "loss": 0.2895, + "loss/gen": 0.0075337570160627365, + "loss/real": 0.24575293064117432, + "rewards/accuracies": 1.0, + "rewards/generated": -688.6021728515625, + "rewards/margins": 700.0179443359375, + "rewards/real": 11.415748596191406, + "step": 378 + }, + { + "epoch": 0.79, + "grad_norm": 19.998951693717753, + "learning_rate": 6.259307172969606e-08, + "logits/generated": 0.8398880958557129, + "logits/oppo_generated": -2.7056989669799805, + "logits/oppo_real": -2.744664192199707, + "logits/real": -1.7210136651992798, + "logps/generated": -686.8607177734375, + "logps/oppo_gen": -70.72666931152344, + "logps/oppo_real": -221.324951171875, + "logps/real": -223.03982543945312, + "loss": 0.2638, + "loss/gen": 0.017973629757761955, + "loss/real": 0.3097477853298187, + "rewards/accuracies": 1.0, + "rewards/generated": -616.1341552734375, + "rewards/margins": 614.4192504882812, + "rewards/real": -1.7148818969726562, + "step": 379 + }, + { + "epoch": 0.79, + "grad_norm": 29.975495566756795, + "learning_rate": 6.138919252022435e-08, + "logits/generated": -0.37420105934143066, + "logits/oppo_generated": -2.858649492263794, + "logits/oppo_real": -2.756929874420166, + "logits/real": -2.3294174671173096, + "logps/generated": -824.7210693359375, + "logps/oppo_gen": -85.56549072265625, + "logps/oppo_real": -359.80169677734375, + "logps/real": -327.5230712890625, + "loss": 0.2376, + "loss/gen": 0.030374273657798767, + "loss/real": 0.15413853526115417, + "rewards/accuracies": 1.0, + "rewards/generated": -739.1556396484375, + "rewards/margins": 771.4342041015625, + "rewards/real": 32.27858352661133, + "step": 380 + }, + { + "epoch": 0.8, + "grad_norm": 28.702696521366356, + "learning_rate": 6.019538094609759e-08, + "logits/generated": 0.43896496295928955, + "logits/oppo_generated": -2.8645360469818115, + "logits/oppo_real": -2.8193347454071045, + "logits/real": -2.3504862785339355, + "logps/generated": -752.4977416992188, + "logps/oppo_gen": -87.71719360351562, + "logps/oppo_real": -305.0334167480469, + "logps/real": -288.9637756347656, + "loss": 0.2437, + "loss/gen": 0.06234963238239288, + "loss/real": 0.18159055709838867, + "rewards/accuracies": 1.0, + "rewards/generated": -664.780517578125, + "rewards/margins": 680.8502197265625, + "rewards/real": 16.069637298583984, + "step": 381 + }, + { + "epoch": 0.8, + "grad_norm": 41.82788861065067, + "learning_rate": 5.9011700730385224e-08, + "logits/generated": 0.6830179691314697, + "logits/oppo_generated": -2.926161766052246, + "logits/oppo_real": -2.8411059379577637, + "logits/real": -2.332524299621582, + "logps/generated": -651.4114990234375, + "logps/oppo_gen": -72.51399993896484, + "logps/oppo_real": -324.98846435546875, + "logps/real": -315.5436706542969, + "loss": 0.3115, + "loss/gen": 0.13546037673950195, + "loss/real": 0.22851350903511047, + "rewards/accuracies": 1.0, + "rewards/generated": -578.8975830078125, + "rewards/margins": 588.3423461914062, + "rewards/real": 9.444814682006836, + "step": 382 + }, + { + "epoch": 0.8, + "grad_norm": 23.53387866086291, + "learning_rate": 5.7838215055366954e-08, + "logits/generated": 0.6993808746337891, + "logits/oppo_generated": -2.8700051307678223, + "logits/oppo_real": -2.7701120376586914, + "logits/real": -2.3738186359405518, + "logps/generated": -675.7946166992188, + "logps/oppo_gen": -63.61039352416992, + "logps/oppo_real": -258.7322998046875, + "logps/real": -240.4178466796875, + "loss": 0.2715, + "loss/gen": 0.0490497350692749, + "loss/real": 0.17794281244277954, + "rewards/accuracies": 1.0, + "rewards/generated": -612.1842651367188, + "rewards/margins": 630.4986572265625, + "rewards/real": 18.31442642211914, + "step": 383 + }, + { + "epoch": 0.8, + "grad_norm": 27.44267929653838, + "learning_rate": 5.6674986559160004e-08, + "logits/generated": 1.583488941192627, + "logits/oppo_generated": -2.539917469024658, + "logits/oppo_real": -2.5254969596862793, + "logits/real": -1.7350016832351685, + "logps/generated": -687.21630859375, + "logps/oppo_gen": -51.28068923950195, + "logps/oppo_real": -226.71041870117188, + "logps/real": -194.57864379882812, + "loss": 0.2423, + "loss/gen": 0.019552189856767654, + "loss/real": 0.16991135478019714, + "rewards/accuracies": 1.0, + "rewards/generated": -635.935546875, + "rewards/margins": 668.0673828125, + "rewards/real": 32.13178634643555, + "step": 384 + }, + { + "epoch": 0.81, + "grad_norm": 23.579331272986728, + "learning_rate": 5.552207733237543e-08, + "logits/generated": 0.33335748314857483, + "logits/oppo_generated": -2.7529163360595703, + "logits/oppo_real": -2.814352512359619, + "logits/real": -1.8816819190979004, + "logps/generated": -772.8755493164062, + "logps/oppo_gen": -78.71615600585938, + "logps/oppo_real": -282.2195129394531, + "logps/real": -305.6546936035156, + "loss": 0.2747, + "loss/gen": 0.0355767123401165, + "loss/real": 0.39402052760124207, + "rewards/accuracies": 0.9375, + "rewards/generated": -694.159423828125, + "rewards/margins": 670.7242431640625, + "rewards/real": -23.4351806640625, + "step": 385 + }, + { + "epoch": 0.81, + "grad_norm": 25.897959083062293, + "learning_rate": 5.4379548914804427e-08, + "logits/generated": -1.016433835029602, + "logits/oppo_generated": -2.8893675804138184, + "logits/oppo_real": -2.8325114250183105, + "logits/real": -2.454998016357422, + "logps/generated": -944.657470703125, + "logps/oppo_gen": -82.72488403320312, + "logps/oppo_real": -338.88629150390625, + "logps/real": -304.11285400390625, + "loss": 0.3319, + "loss/gen": 0.010652797296643257, + "loss/real": 0.14876721799373627, + "rewards/accuracies": 1.0, + "rewards/generated": -861.9326171875, + "rewards/margins": 896.7060546875, + "rewards/real": 34.7734375, + "step": 386 + }, + { + "epoch": 0.81, + "grad_norm": 32.422641180706, + "learning_rate": 5.324746229213281e-08, + "logits/generated": -0.0454447939991951, + "logits/oppo_generated": -2.660921573638916, + "logits/oppo_real": -2.8100593090057373, + "logits/real": -1.9440929889678955, + "logps/generated": -732.8896484375, + "logps/oppo_gen": -74.4239730834961, + "logps/oppo_real": -280.35174560546875, + "logps/real": -247.58436584472656, + "loss": 0.3088, + "loss/gen": 0.048400383442640305, + "loss/real": 0.1509433090686798, + "rewards/accuracies": 1.0, + "rewards/generated": -658.4656982421875, + "rewards/margins": 691.2330932617188, + "rewards/real": 32.76738739013672, + "step": 387 + }, + { + "epoch": 0.81, + "grad_norm": 33.62113346147591, + "learning_rate": 5.212587789268649e-08, + "logits/generated": 0.09888418763875961, + "logits/oppo_generated": -2.5671732425689697, + "logits/oppo_real": -2.823955535888672, + "logits/real": -1.5941078662872314, + "logps/generated": -636.989013671875, + "logps/oppo_gen": -59.2068977355957, + "logps/oppo_real": -214.0272674560547, + "logps/real": -246.39768981933594, + "loss": 0.3632, + "loss/gen": 0.1291210651397705, + "loss/real": 0.4964044690132141, + "rewards/accuracies": 1.0, + "rewards/generated": -577.7821044921875, + "rewards/margins": 545.41162109375, + "rewards/real": -32.37043380737305, + "step": 388 + }, + { + "epoch": 0.81, + "grad_norm": 34.79504548681734, + "learning_rate": 5.101485558420504e-08, + "logits/generated": -0.21139076352119446, + "logits/oppo_generated": -2.7977442741394043, + "logits/oppo_real": -2.9435877799987793, + "logits/real": -2.2349720001220703, + "logps/generated": -750.2471923828125, + "logps/oppo_gen": -87.367431640625, + "logps/oppo_real": -360.75341796875, + "logps/real": -329.6597900390625, + "loss": 0.241, + "loss/gen": 0.057427436113357544, + "loss/real": 0.15328460931777954, + "rewards/accuracies": 1.0, + "rewards/generated": -662.8797607421875, + "rewards/margins": 693.9732666015625, + "rewards/real": 31.093629837036133, + "step": 389 + }, + { + "epoch": 0.82, + "grad_norm": 30.21505762084941, + "learning_rate": 4.991445467064689e-08, + "logits/generated": -0.5020634531974792, + "logits/oppo_generated": -2.736494302749634, + "logits/oppo_real": -2.5568008422851562, + "logits/real": -2.028607130050659, + "logps/generated": -551.84033203125, + "logps/oppo_gen": -63.113067626953125, + "logps/oppo_real": -307.4358215332031, + "logps/real": -276.339599609375, + "loss": 0.3518, + "loss/gen": 0.1953495442867279, + "loss/real": 0.1559717357158661, + "rewards/accuracies": 1.0, + "rewards/generated": -488.72723388671875, + "rewards/margins": 519.8234252929688, + "rewards/real": 31.096202850341797, + "step": 390 + }, + { + "epoch": 0.82, + "grad_norm": 34.30040556998743, + "learning_rate": 4.882473388902322e-08, + "logits/generated": -0.31282973289489746, + "logits/oppo_generated": -2.565460681915283, + "logits/oppo_real": -2.567373037338257, + "logits/real": -2.015244245529175, + "logps/generated": -674.6446533203125, + "logps/oppo_gen": -81.51359558105469, + "logps/oppo_real": -271.79815673828125, + "logps/real": -240.47508239746094, + "loss": 0.3096, + "loss/gen": 0.14029625058174133, + "loss/real": 0.15811359882354736, + "rewards/accuracies": 1.0, + "rewards/generated": -593.1309814453125, + "rewards/margins": 624.4541015625, + "rewards/real": 31.323078155517578, + "step": 391 + }, + { + "epoch": 0.82, + "grad_norm": 28.188018591058217, + "learning_rate": 4.774575140626316e-08, + "logits/generated": -0.5394298434257507, + "logits/oppo_generated": -2.7671310901641846, + "logits/oppo_real": -2.7871756553649902, + "logits/real": -2.0843000411987305, + "logps/generated": -701.097900390625, + "logps/oppo_gen": -84.26641845703125, + "logps/oppo_real": -395.81341552734375, + "logps/real": -363.90277099609375, + "loss": 0.349, + "loss/gen": 0.09549596160650253, + "loss/real": 0.1539105772972107, + "rewards/accuracies": 1.0, + "rewards/generated": -616.8314819335938, + "rewards/margins": 648.7420654296875, + "rewards/real": 31.910619735717773, + "step": 392 + }, + { + "epoch": 0.82, + "grad_norm": 58.53682026017364, + "learning_rate": 4.667756481610866e-08, + "logits/generated": -1.2799639701843262, + "logits/oppo_generated": -2.7752625942230225, + "logits/oppo_real": -2.965273380279541, + "logits/real": -2.285590887069702, + "logps/generated": -534.36328125, + "logps/oppo_gen": -66.7477035522461, + "logps/oppo_real": -274.6632385253906, + "logps/real": -244.97430419921875, + "loss": 0.3684, + "loss/gen": 0.18041257560253143, + "loss/real": 0.15445731580257416, + "rewards/accuracies": 1.0, + "rewards/generated": -467.61553955078125, + "rewards/margins": 497.30450439453125, + "rewards/real": 29.688926696777344, + "step": 393 + }, + { + "epoch": 0.82, + "grad_norm": 19.140116383548488, + "learning_rate": 4.562023113604041e-08, + "logits/generated": 0.006332114338874817, + "logits/oppo_generated": -2.8669140338897705, + "logits/oppo_real": -2.829169750213623, + "logits/real": -2.427414655685425, + "logps/generated": -813.6133422851562, + "logps/oppo_gen": -90.48013305664062, + "logps/oppo_real": -305.9639587402344, + "logps/real": -272.2070617675781, + "loss": 0.2935, + "loss/gen": 0.015020076185464859, + "loss/real": 0.1492321491241455, + "rewards/accuracies": 1.0, + "rewards/generated": -723.13330078125, + "rewards/margins": 756.89013671875, + "rewards/real": 33.756927490234375, + "step": 394 + }, + { + "epoch": 0.83, + "grad_norm": 23.55447761100517, + "learning_rate": 4.4573806804234335e-08, + "logits/generated": 0.7242439985275269, + "logits/oppo_generated": -2.8029961585998535, + "logits/oppo_real": -2.7388648986816406, + "logits/real": -2.2442853450775146, + "logps/generated": -746.589111328125, + "logps/oppo_gen": -84.95040893554688, + "logps/oppo_real": -303.9382629394531, + "logps/real": -277.8614196777344, + "loss": 0.2665, + "loss/gen": 0.023716842755675316, + "loss/real": 0.16393963992595673, + "rewards/accuracies": 1.0, + "rewards/generated": -661.638671875, + "rewards/margins": 687.7155151367188, + "rewards/real": 26.076847076416016, + "step": 395 + }, + { + "epoch": 0.83, + "grad_norm": 23.563012884006895, + "learning_rate": 4.3538347676548956e-08, + "logits/generated": 0.9573155045509338, + "logits/oppo_generated": -2.836289882659912, + "logits/oppo_real": -2.856013298034668, + "logits/real": -2.266578197479248, + "logps/generated": -776.6182861328125, + "logps/oppo_gen": -75.90487670898438, + "logps/oppo_real": -270.5047607421875, + "logps/real": -238.49551391601562, + "loss": 0.2417, + "loss/gen": 0.020842332392930984, + "loss/real": 0.15183612704277039, + "rewards/accuracies": 1.0, + "rewards/generated": -700.71337890625, + "rewards/margins": 732.72265625, + "rewards/real": 32.009239196777344, + "step": 396 + }, + { + "epoch": 0.83, + "grad_norm": 26.58242129811189, + "learning_rate": 4.251390902354413e-08, + "logits/generated": -0.4084343910217285, + "logits/oppo_generated": -2.8244986534118652, + "logits/oppo_real": -2.845081329345703, + "logits/real": -2.4438085556030273, + "logps/generated": -735.8739013671875, + "logps/oppo_gen": -82.96200561523438, + "logps/oppo_real": -377.468017578125, + "logps/real": -351.4737548828125, + "loss": 0.2848, + "loss/gen": 0.046934593468904495, + "loss/real": 0.16870608925819397, + "rewards/accuracies": 1.0, + "rewards/generated": -652.911865234375, + "rewards/margins": 678.9061279296875, + "rewards/real": 25.994253158569336, + "step": 397 + }, + { + "epoch": 0.83, + "grad_norm": 33.93199447458751, + "learning_rate": 4.1500545527530544e-08, + "logits/generated": 0.4591218829154968, + "logits/oppo_generated": -2.9145877361297607, + "logits/oppo_real": -2.768162727355957, + "logits/real": -2.4178929328918457, + "logps/generated": -820.8162841796875, + "logps/oppo_gen": -72.26435852050781, + "logps/oppo_real": -264.8419494628906, + "logps/real": -247.36727905273438, + "loss": 0.2787, + "loss/gen": 0.03440755978226662, + "loss/real": 0.18177896738052368, + "rewards/accuracies": 1.0, + "rewards/generated": -748.5518798828125, + "rewards/margins": 766.026611328125, + "rewards/real": 17.47467803955078, + "step": 398 + }, + { + "epoch": 0.83, + "grad_norm": 19.506923131258507, + "learning_rate": 4.0498311279651196e-08, + "logits/generated": 0.3644871413707733, + "logits/oppo_generated": -2.825018882751465, + "logits/oppo_real": -2.7147412300109863, + "logits/real": -2.2287793159484863, + "logps/generated": -812.976318359375, + "logps/oppo_gen": -73.07958984375, + "logps/oppo_real": -251.62904357910156, + "logps/real": -227.3709716796875, + "loss": 0.2056, + "loss/gen": 0.03383062779903412, + "loss/real": 0.16399678587913513, + "rewards/accuracies": 1.0, + "rewards/generated": -739.896728515625, + "rewards/margins": 764.15478515625, + "rewards/real": 24.258071899414062, + "step": 399 + }, + { + "epoch": 0.84, + "grad_norm": 27.640475948227287, + "learning_rate": 3.9507259776993954e-08, + "logits/generated": -0.3294936716556549, + "logits/oppo_generated": -2.8925132751464844, + "logits/oppo_real": -3.0092806816101074, + "logits/real": -2.379504919052124, + "logps/generated": -743.8359985351562, + "logps/oppo_gen": -92.540771484375, + "logps/oppo_real": -473.1883239746094, + "logps/real": -436.2169189453125, + "loss": 0.2205, + "loss/gen": 0.045243918895721436, + "loss/real": 0.14624208211898804, + "rewards/accuracies": 1.0, + "rewards/generated": -651.2952880859375, + "rewards/margins": 688.2666015625, + "rewards/real": 36.97139358520508, + "step": 400 + }, + { + "epoch": 0.84, + "grad_norm": 27.54912493470471, + "learning_rate": 3.8527443919736006e-08, + "logits/generated": 0.2186594009399414, + "logits/oppo_generated": -2.6842846870422363, + "logits/oppo_real": -2.769160270690918, + "logits/real": -2.1254193782806396, + "logps/generated": -810.92626953125, + "logps/oppo_gen": -88.25856018066406, + "logps/oppo_real": -359.1764831542969, + "logps/real": -332.98193359375, + "loss": 0.2985, + "loss/gen": 0.015042455866932869, + "loss/real": 0.16066262125968933, + "rewards/accuracies": 1.0, + "rewards/generated": -722.667724609375, + "rewards/margins": 748.8622436523438, + "rewards/real": 26.194541931152344, + "step": 401 + }, + { + "epoch": 0.84, + "grad_norm": 27.868933047377663, + "learning_rate": 3.755891600832026e-08, + "logits/generated": 1.6786067485809326, + "logits/oppo_generated": -2.787109851837158, + "logits/oppo_real": -2.6736297607421875, + "logits/real": -2.085148334503174, + "logps/generated": -823.227783203125, + "logps/oppo_gen": -83.42156982421875, + "logps/oppo_real": -251.31224060058594, + "logps/real": -221.16571044921875, + "loss": 0.2847, + "loss/gen": 0.04803692549467087, + "loss/real": 0.15670424699783325, + "rewards/accuracies": 1.0, + "rewards/generated": -739.8062133789062, + "rewards/margins": 769.9527587890625, + "rewards/real": 30.146543502807617, + "step": 402 + }, + { + "epoch": 0.84, + "grad_norm": 26.888394872297486, + "learning_rate": 3.660172774066339e-08, + "logits/generated": 0.3176983594894409, + "logits/oppo_generated": -2.6812472343444824, + "logits/oppo_real": -2.740905284881592, + "logits/real": -1.7640082836151123, + "logps/generated": -676.04443359375, + "logps/oppo_gen": -76.80146789550781, + "logps/oppo_real": -230.00216674804688, + "logps/real": -231.5615997314453, + "loss": 0.2695, + "loss/gen": 0.1879962980747223, + "loss/real": 0.293995201587677, + "rewards/accuracies": 1.0, + "rewards/generated": -599.2429809570312, + "rewards/margins": 597.6835327148438, + "rewards/real": -1.5594367980957031, + "step": 403 + }, + { + "epoch": 0.85, + "grad_norm": 39.111272676692906, + "learning_rate": 3.565593020939678e-08, + "logits/generated": -0.10082553327083588, + "logits/oppo_generated": -2.7819857597351074, + "logits/oppo_real": -2.6031432151794434, + "logits/real": -2.1621930599212646, + "logps/generated": -761.4735107421875, + "logps/oppo_gen": -71.76890563964844, + "logps/oppo_real": -343.7472229003906, + "logps/real": -331.093994140625, + "loss": 0.3908, + "loss/gen": 0.10773513466119766, + "loss/real": 0.2386000156402588, + "rewards/accuracies": 1.0, + "rewards/generated": -689.70458984375, + "rewards/margins": 702.3577880859375, + "rewards/real": 12.653242111206055, + "step": 404 + }, + { + "epoch": 0.85, + "grad_norm": 24.426767943948295, + "learning_rate": 3.472157389913874e-08, + "logits/generated": -0.336029589176178, + "logits/oppo_generated": -2.587843894958496, + "logits/oppo_real": -2.5900180339813232, + "logits/real": -1.9749070405960083, + "logps/generated": -670.703857421875, + "logps/oppo_gen": -80.07936096191406, + "logps/oppo_real": -271.55816650390625, + "logps/real": -249.90403747558594, + "loss": 0.2879, + "loss/gen": 0.2505200207233429, + "loss/real": 0.16969501972198486, + "rewards/accuracies": 1.0, + "rewards/generated": -590.62451171875, + "rewards/margins": 612.2786254882812, + "rewards/real": 21.654117584228516, + "step": 405 + }, + { + "epoch": 0.85, + "grad_norm": 28.34683887007123, + "learning_rate": 3.3798708683800305e-08, + "logits/generated": -0.4590807259082794, + "logits/oppo_generated": -2.864154815673828, + "logits/oppo_real": -2.9096922874450684, + "logits/real": -2.42179536819458, + "logps/generated": -784.5616455078125, + "logps/oppo_gen": -83.42448425292969, + "logps/oppo_real": -366.8133850097656, + "logps/real": -336.69012451171875, + "loss": 0.3182, + "loss/gen": 0.04298119992017746, + "loss/real": 0.15586364269256592, + "rewards/accuracies": 1.0, + "rewards/generated": -701.13720703125, + "rewards/margins": 731.260498046875, + "rewards/real": 30.12326431274414, + "step": 406 + }, + { + "epoch": 0.85, + "grad_norm": 30.114497083137756, + "learning_rate": 3.288738382392273e-08, + "logits/generated": -0.08086250722408295, + "logits/oppo_generated": -2.6285908222198486, + "logits/oppo_real": -2.733128070831299, + "logits/real": -1.9479438066482544, + "logps/generated": -670.986328125, + "logps/oppo_gen": -78.88270568847656, + "logps/oppo_real": -345.356201171875, + "logps/real": -317.0779113769531, + "loss": 0.2838, + "loss/gen": 0.07308726012706757, + "loss/real": 0.16547003388404846, + "rewards/accuracies": 1.0, + "rewards/generated": -592.1036987304688, + "rewards/margins": 620.3819580078125, + "rewards/real": 28.278263092041016, + "step": 407 + }, + { + "epoch": 0.85, + "grad_norm": 25.61442593379693, + "learning_rate": 3.198764796404807e-08, + "logits/generated": -0.07485093921422958, + "logits/oppo_generated": -2.663846015930176, + "logits/oppo_real": -2.728782892227173, + "logits/real": -1.9464470148086548, + "logps/generated": -773.8787841796875, + "logps/oppo_gen": -71.8175048828125, + "logps/oppo_real": -237.12289428710938, + "logps/real": -206.4139404296875, + "loss": 0.3017, + "loss/gen": 0.023999135941267014, + "loss/real": 0.15480223298072815, + "rewards/accuracies": 1.0, + "rewards/generated": -702.0612182617188, + "rewards/margins": 732.7701416015625, + "rewards/real": 30.708965301513672, + "step": 408 + }, + { + "epoch": 0.86, + "grad_norm": 47.07785533444174, + "learning_rate": 3.109954913012294e-08, + "logits/generated": 1.2388019561767578, + "logits/oppo_generated": -2.6548547744750977, + "logits/oppo_real": -2.708220958709717, + "logits/real": -2.0130181312561035, + "logps/generated": -757.4107666015625, + "logps/oppo_gen": -63.25993728637695, + "logps/oppo_real": -208.60333251953125, + "logps/real": -193.34283447265625, + "loss": 0.2663, + "loss/gen": 0.05390400439500809, + "loss/real": 0.18397364020347595, + "rewards/accuracies": 1.0, + "rewards/generated": -694.1507568359375, + "rewards/margins": 709.4112548828125, + "rewards/real": 15.260494232177734, + "step": 409 + }, + { + "epoch": 0.86, + "grad_norm": 38.33413279708682, + "learning_rate": 3.022313472693447e-08, + "logits/generated": 0.11504703760147095, + "logits/oppo_generated": -2.7640318870544434, + "logits/oppo_real": -2.821716070175171, + "logits/real": -1.8772614002227783, + "logps/generated": -611.8921508789062, + "logps/oppo_gen": -65.58993530273438, + "logps/oppo_real": -218.73495483398438, + "logps/real": -219.8480987548828, + "loss": 0.3002, + "loss/gen": 0.06124342978000641, + "loss/real": 0.3138381838798523, + "rewards/accuracies": 1.0, + "rewards/generated": -546.30224609375, + "rewards/margins": 545.1890869140625, + "rewards/real": -1.113149642944336, + "step": 410 + }, + { + "epoch": 0.86, + "grad_norm": 28.46425917344361, + "learning_rate": 2.935845153558053e-08, + "logits/generated": 0.9498586058616638, + "logits/oppo_generated": -2.7485408782958984, + "logits/oppo_real": -2.6167445182800293, + "logits/real": -1.8449242115020752, + "logps/generated": -763.2135620117188, + "logps/oppo_gen": -85.22071838378906, + "logps/oppo_real": -229.88229370117188, + "logps/real": -216.39529418945312, + "loss": 0.2803, + "loss/gen": 0.04368508979678154, + "loss/real": 0.18471089005470276, + "rewards/accuracies": 1.0, + "rewards/generated": -677.9928588867188, + "rewards/margins": 691.4798583984375, + "rewards/real": 13.486985206604004, + "step": 411 + }, + { + "epoch": 0.86, + "grad_norm": 22.72136923948879, + "learning_rate": 2.8505545710972107e-08, + "logits/generated": 0.07348564267158508, + "logits/oppo_generated": -2.971525192260742, + "logits/oppo_real": -2.9833126068115234, + "logits/real": -2.593665361404419, + "logps/generated": -748.5177612304688, + "logps/oppo_gen": -84.10810852050781, + "logps/oppo_real": -449.6239929199219, + "logps/real": -409.41595458984375, + "loss": 0.2278, + "loss/gen": 0.010234430432319641, + "loss/real": 0.1431514173746109, + "rewards/accuracies": 1.0, + "rewards/generated": -664.40966796875, + "rewards/margins": 704.61767578125, + "rewards/real": 40.208045959472656, + "step": 412 + }, + { + "epoch": 0.86, + "grad_norm": 32.74029257371565, + "learning_rate": 2.766446277937029e-08, + "logits/generated": -0.634722113609314, + "logits/oppo_generated": -2.7652645111083984, + "logits/oppo_real": -3.0290045738220215, + "logits/real": -2.1673192977905273, + "logps/generated": -838.4489135742188, + "logps/oppo_gen": -95.71146392822266, + "logps/oppo_real": -355.184326171875, + "logps/real": -333.04266357421875, + "loss": 0.2874, + "loss/gen": 0.015061789192259312, + "loss/real": 0.17296290397644043, + "rewards/accuracies": 1.0, + "rewards/generated": -742.7374267578125, + "rewards/margins": 764.879150390625, + "rewards/real": 22.141700744628906, + "step": 413 + }, + { + "epoch": 0.87, + "grad_norm": 27.035180801625177, + "learning_rate": 2.683524763595546e-08, + "logits/generated": 0.41425225138664246, + "logits/oppo_generated": -2.7904341220855713, + "logits/oppo_real": -2.7785701751708984, + "logits/real": -1.5771113634109497, + "logps/generated": -848.3800659179688, + "logps/oppo_gen": -72.34674072265625, + "logps/oppo_real": -232.07763671875, + "logps/real": -246.59060668945312, + "loss": 0.3127, + "loss/gen": 0.1244996190071106, + "loss/real": 0.3740527629852295, + "rewards/accuracies": 0.9375, + "rewards/generated": -776.0333251953125, + "rewards/margins": 761.5203857421875, + "rewards/real": -14.51296615600586, + "step": 414 + }, + { + "epoch": 0.87, + "grad_norm": 40.5459146080866, + "learning_rate": 2.601794454243139e-08, + "logits/generated": -0.259597510099411, + "logits/oppo_generated": -2.692986011505127, + "logits/oppo_real": -2.932211399078369, + "logits/real": -2.1438608169555664, + "logps/generated": -699.6242065429688, + "logps/oppo_gen": -74.73764038085938, + "logps/oppo_real": -333.96795654296875, + "logps/real": -311.9378662109375, + "loss": 0.296, + "loss/gen": 0.0382215790450573, + "loss/real": 0.1780180037021637, + "rewards/accuracies": 1.0, + "rewards/generated": -624.8865356445312, + "rewards/margins": 646.9166259765625, + "rewards/real": 22.03009796142578, + "step": 415 + }, + { + "epoch": 0.87, + "grad_norm": 23.11180604395254, + "learning_rate": 2.521259712466256e-08, + "logits/generated": 0.33909279108047485, + "logits/oppo_generated": -2.8526077270507812, + "logits/oppo_real": -2.811613082885742, + "logits/real": -2.142138957977295, + "logps/generated": -759.60107421875, + "logps/oppo_gen": -85.68292236328125, + "logps/oppo_real": -291.8996276855469, + "logps/real": -274.5881652832031, + "loss": 0.281, + "loss/gen": 0.12079505622386932, + "loss/real": 0.1788100302219391, + "rewards/accuracies": 1.0, + "rewards/generated": -673.9180908203125, + "rewards/margins": 691.2295532226562, + "rewards/real": 17.31146240234375, + "step": 416 + }, + { + "epoch": 0.87, + "grad_norm": 47.16826491237249, + "learning_rate": 2.4419248370345285e-08, + "logits/generated": 0.8615379333496094, + "logits/oppo_generated": -2.636507034301758, + "logits/oppo_real": -2.647146701812744, + "logits/real": -1.938757300376892, + "logps/generated": -693.5307006835938, + "logps/oppo_gen": -58.54317092895508, + "logps/oppo_real": -258.7288818359375, + "logps/real": -235.12351989746094, + "loss": 0.3143, + "loss/gen": 0.04418793320655823, + "loss/real": 0.16570395231246948, + "rewards/accuracies": 1.0, + "rewards/generated": -634.987548828125, + "rewards/margins": 658.5928955078125, + "rewards/real": 23.605348587036133, + "step": 417 + }, + { + "epoch": 0.87, + "grad_norm": 33.410398991065456, + "learning_rate": 2.3637940626713342e-08, + "logits/generated": -0.08828268945217133, + "logits/oppo_generated": -2.8972327709198, + "logits/oppo_real": -2.817399024963379, + "logits/real": -2.1558895111083984, + "logps/generated": -801.6412963867188, + "logps/oppo_gen": -77.41506958007812, + "logps/oppo_real": -216.58090209960938, + "logps/real": -202.15652465820312, + "loss": 0.2671, + "loss/gen": 0.014140581712126732, + "loss/real": 0.18970459699630737, + "rewards/accuracies": 1.0, + "rewards/generated": -724.2261962890625, + "rewards/margins": 738.650634765625, + "rewards/real": 14.424371719360352, + "step": 418 + }, + { + "epoch": 0.88, + "grad_norm": 26.887554979822333, + "learning_rate": 2.2868715598277578e-08, + "logits/generated": 0.38027942180633545, + "logits/oppo_generated": -2.589816093444824, + "logits/oppo_real": -2.513615608215332, + "logits/real": -1.8797662258148193, + "logps/generated": -772.2117309570312, + "logps/oppo_gen": -65.45459747314453, + "logps/oppo_real": -176.86581420898438, + "logps/real": -155.86514282226562, + "loss": 0.2818, + "loss/gen": 0.017079707235097885, + "loss/real": 0.18445941805839539, + "rewards/accuracies": 1.0, + "rewards/generated": -706.757080078125, + "rewards/margins": 727.7578125, + "rewards/real": 21.00067901611328, + "step": 419 + }, + { + "epoch": 0.88, + "grad_norm": 30.502731451077665, + "learning_rate": 2.2111614344599684e-08, + "logits/generated": 0.21723803877830505, + "logits/oppo_generated": -2.741001605987549, + "logits/oppo_real": -2.9104204177856445, + "logits/real": -2.0397191047668457, + "logps/generated": -762.03857421875, + "logps/oppo_gen": -73.04945373535156, + "logps/oppo_real": -313.0050048828125, + "logps/real": -289.78375244140625, + "loss": 0.2727, + "loss/gen": 0.055183976888656616, + "loss/real": 0.16841815412044525, + "rewards/accuracies": 1.0, + "rewards/generated": -688.9891357421875, + "rewards/margins": 712.21044921875, + "rewards/real": 23.221250534057617, + "step": 420 + }, + { + "epoch": 0.88, + "grad_norm": 52.73633561170135, + "learning_rate": 2.1366677278100486e-08, + "logits/generated": 1.2762212753295898, + "logits/oppo_generated": -2.7252440452575684, + "logits/oppo_real": -2.489119052886963, + "logits/real": -1.8293414115905762, + "logps/generated": -715.15185546875, + "logps/oppo_gen": -75.96475982666016, + "logps/oppo_real": -274.5854797363281, + "logps/real": -288.38543701171875, + "loss": 0.3453, + "loss/gen": 0.0465887188911438, + "loss/real": 0.39446061849594116, + "rewards/accuracies": 1.0, + "rewards/generated": -639.1871337890625, + "rewards/margins": 625.38720703125, + "rewards/real": -13.799976348876953, + "step": 421 + }, + { + "epoch": 0.88, + "grad_norm": 31.28419125626542, + "learning_rate": 2.0633944161903145e-08, + "logits/generated": 0.22932855784893036, + "logits/oppo_generated": -2.8465185165405273, + "logits/oppo_real": -2.874579668045044, + "logits/real": -2.474705457687378, + "logps/generated": -794.6787719726562, + "logps/oppo_gen": -77.42123413085938, + "logps/oppo_real": -269.32452392578125, + "logps/real": -232.96438598632812, + "loss": 0.2468, + "loss/gen": 0.01573217660188675, + "loss/real": 0.14619815349578857, + "rewards/accuracies": 1.0, + "rewards/generated": -717.257568359375, + "rewards/margins": 753.61767578125, + "rewards/real": 36.360107421875, + "step": 422 + }, + { + "epoch": 0.88, + "grad_norm": 35.38810007549324, + "learning_rate": 1.991345410771017e-08, + "logits/generated": 0.016978725790977478, + "logits/oppo_generated": -2.8310189247131348, + "logits/oppo_real": -2.79268741607666, + "logits/real": -2.273961067199707, + "logps/generated": -612.0408325195312, + "logps/oppo_gen": -61.178497314453125, + "logps/oppo_real": -251.74114990234375, + "logps/real": -251.0238800048828, + "loss": 0.2934, + "loss/gen": 0.06265068799257278, + "loss/real": 0.2755354940891266, + "rewards/accuracies": 1.0, + "rewards/generated": -550.8623657226562, + "rewards/margins": 551.57958984375, + "rewards/real": 0.7172629833221436, + "step": 423 + }, + { + "epoch": 0.89, + "grad_norm": 32.6389252040652, + "learning_rate": 1.9205245573716195e-08, + "logits/generated": 0.4440228343009949, + "logits/oppo_generated": -2.7672994136810303, + "logits/oppo_real": -2.967806577682495, + "logits/real": -2.1830549240112305, + "logps/generated": -733.4920654296875, + "logps/oppo_gen": -83.01123046875, + "logps/oppo_real": -284.7894287109375, + "logps/real": -254.63729858398438, + "loss": 0.2998, + "loss/gen": 0.04325643181800842, + "loss/real": 0.16018053889274597, + "rewards/accuracies": 1.0, + "rewards/generated": -650.4808349609375, + "rewards/margins": 680.6329956054688, + "rewards/real": 30.152151107788086, + "step": 424 + }, + { + "epoch": 0.89, + "grad_norm": 22.53736724560527, + "learning_rate": 1.850935636255496e-08, + "logits/generated": 0.13050776720046997, + "logits/oppo_generated": -2.674236297607422, + "logits/oppo_real": -2.7958316802978516, + "logits/real": -2.067239761352539, + "logps/generated": -669.11083984375, + "logps/oppo_gen": -70.25758361816406, + "logps/oppo_real": -324.11871337890625, + "logps/real": -289.18328857421875, + "loss": 0.276, + "loss/gen": 0.06311353296041489, + "loss/real": 0.14721855521202087, + "rewards/accuracies": 1.0, + "rewards/generated": -598.853271484375, + "rewards/margins": 633.7886962890625, + "rewards/real": 34.93544006347656, + "step": 425 + }, + { + "epoch": 0.89, + "grad_norm": 37.272236376318865, + "learning_rate": 1.7825823619281448e-08, + "logits/generated": -0.16331154108047485, + "logits/oppo_generated": -2.7839677333831787, + "logits/oppo_real": -2.795403480529785, + "logits/real": -1.8390673398971558, + "logps/generated": -816.87646484375, + "logps/oppo_gen": -85.01667785644531, + "logps/oppo_real": -272.42120361328125, + "logps/real": -262.7786560058594, + "loss": 0.3006, + "loss/gen": 0.015330037102103233, + "loss/real": 0.2208605408668518, + "rewards/accuracies": 1.0, + "rewards/generated": -731.8598022460938, + "rewards/margins": 741.5023193359375, + "rewards/real": 9.64252758026123, + "step": 426 + }, + { + "epoch": 0.89, + "grad_norm": 27.185687125262373, + "learning_rate": 1.7154683829389283e-08, + "logits/generated": 0.024800747632980347, + "logits/oppo_generated": -2.7494869232177734, + "logits/oppo_real": -2.8051180839538574, + "logits/real": -2.1958751678466797, + "logps/generated": -673.5935668945312, + "logps/oppo_gen": -77.26094055175781, + "logps/oppo_real": -330.8422546386719, + "logps/real": -315.17755126953125, + "loss": 0.2735, + "loss/gen": 0.0413350835442543, + "loss/real": 0.23769381642341614, + "rewards/accuracies": 1.0, + "rewards/generated": -596.3326416015625, + "rewards/margins": 611.997314453125, + "rewards/real": 15.664695739746094, + "step": 427 + }, + { + "epoch": 0.9, + "grad_norm": 25.722542057928003, + "learning_rate": 1.649597281686302e-08, + "logits/generated": 1.0848944187164307, + "logits/oppo_generated": -2.645679473876953, + "logits/oppo_real": -2.5204734802246094, + "logits/real": -1.728602409362793, + "logps/generated": -722.6002197265625, + "logps/oppo_gen": -55.84619903564453, + "logps/oppo_real": -248.24684143066406, + "logps/real": -255.62530517578125, + "loss": 0.2611, + "loss/gen": 0.11462613195180893, + "loss/real": 0.43046581745147705, + "rewards/accuracies": 1.0, + "rewards/generated": -666.7540283203125, + "rewards/margins": 659.3756103515625, + "rewards/real": -7.378480911254883, + "step": 428 + }, + { + "epoch": 0.9, + "grad_norm": 35.29333343233004, + "learning_rate": 1.584972574226623e-08, + "logits/generated": -0.342296838760376, + "logits/oppo_generated": -2.5918760299682617, + "logits/oppo_real": -2.8728556632995605, + "logits/real": -1.588826298713684, + "logps/generated": -894.3289794921875, + "logps/oppo_gen": -103.07553100585938, + "logps/oppo_real": -180.5751190185547, + "logps/real": -166.38107299804688, + "loss": 0.334, + "loss/gen": 0.013701886869966984, + "loss/real": 0.25664207339286804, + "rewards/accuracies": 1.0, + "rewards/generated": -791.25341796875, + "rewards/margins": 805.4474487304688, + "rewards/real": 14.194051742553711, + "step": 429 + }, + { + "epoch": 0.9, + "grad_norm": 28.738920249539213, + "learning_rate": 1.521597710086439e-08, + "logits/generated": -0.3944983184337616, + "logits/oppo_generated": -2.97127366065979, + "logits/oppo_real": -2.8858768939971924, + "logits/real": -2.4181745052337646, + "logps/generated": -694.18017578125, + "logps/oppo_gen": -90.01107788085938, + "logps/oppo_real": -335.91571044921875, + "logps/real": -312.7785339355469, + "loss": 0.2704, + "loss/gen": 0.10327526926994324, + "loss/real": 0.1773584485054016, + "rewards/accuracies": 1.0, + "rewards/generated": -604.1690673828125, + "rewards/margins": 627.3062133789062, + "rewards/real": 23.137134552001953, + "step": 430 + }, + { + "epoch": 0.9, + "grad_norm": 35.86587461521136, + "learning_rate": 1.459476072078386e-08, + "logits/generated": 0.22406895458698273, + "logits/oppo_generated": -2.885314464569092, + "logits/oppo_real": -2.864060163497925, + "logits/real": -2.2564728260040283, + "logps/generated": -758.191650390625, + "logps/oppo_gen": -88.2413558959961, + "logps/oppo_real": -357.7693176269531, + "logps/real": -357.68109130859375, + "loss": 0.3381, + "loss/gen": 0.01729278452694416, + "loss/real": 0.27993616461753845, + "rewards/accuracies": 1.0, + "rewards/generated": -669.9502563476562, + "rewards/margins": 670.0384521484375, + "rewards/real": 0.08818435668945312, + "step": 431 + }, + { + "epoch": 0.9, + "grad_norm": 33.148304007803326, + "learning_rate": 1.3986109761206093e-08, + "logits/generated": -0.19652684032917023, + "logits/oppo_generated": -2.7991626262664795, + "logits/oppo_real": -2.64943265914917, + "logits/real": -2.2657108306884766, + "logps/generated": -740.3734741210938, + "logps/oppo_gen": -112.3591079711914, + "logps/oppo_real": -486.785400390625, + "logps/real": -473.11456298828125, + "loss": 0.3165, + "loss/gen": 0.1387334167957306, + "loss/real": 0.19511449337005615, + "rewards/accuracies": 0.9375, + "rewards/generated": -628.0143432617188, + "rewards/margins": 641.6851806640625, + "rewards/real": 13.670815467834473, + "step": 432 + }, + { + "epoch": 0.91, + "grad_norm": 28.601751356587634, + "learning_rate": 1.3390056710597647e-08, + "logits/generated": -0.7066829204559326, + "logits/oppo_generated": -2.7696914672851562, + "logits/oppo_real": -2.9772748947143555, + "logits/real": -2.063112735748291, + "logps/generated": -639.30126953125, + "logps/oppo_gen": -70.7381591796875, + "logps/oppo_real": -258.11444091796875, + "logps/real": -261.44818115234375, + "loss": 0.3163, + "loss/gen": 0.1252206563949585, + "loss/real": 0.26253828406333923, + "rewards/accuracies": 0.9375, + "rewards/generated": -568.5631103515625, + "rewards/margins": 565.2293701171875, + "rewards/real": -3.333735227584839, + "step": 433 + }, + { + "epoch": 0.91, + "grad_norm": 30.687004753513946, + "learning_rate": 1.280663338497609e-08, + "logits/generated": -0.32708024978637695, + "logits/oppo_generated": -2.801250457763672, + "logits/oppo_real": -2.8633522987365723, + "logits/real": -2.3736109733581543, + "logps/generated": -758.1578979492188, + "logps/oppo_gen": -84.30010223388672, + "logps/oppo_real": -478.0521545410156, + "logps/real": -439.0254821777344, + "loss": 0.3319, + "loss/gen": 0.048829175531864166, + "loss/real": 0.14225539565086365, + "rewards/accuracies": 1.0, + "rewards/generated": -673.8578491210938, + "rewards/margins": 712.884521484375, + "rewards/real": 39.02668762207031, + "step": 434 + }, + { + "epoch": 0.91, + "grad_norm": 32.42948894030998, + "learning_rate": 1.2235870926211616e-08, + "logits/generated": -0.18947938084602356, + "logits/oppo_generated": -2.709965467453003, + "logits/oppo_real": -2.865321159362793, + "logits/real": -1.8958816528320312, + "logps/generated": -598.7344970703125, + "logps/oppo_gen": -56.259098052978516, + "logps/oppo_real": -212.92286682128906, + "logps/real": -202.73770141601562, + "loss": 0.2694, + "loss/gen": 0.23359569907188416, + "loss/real": 0.22692254185676575, + "rewards/accuracies": 1.0, + "rewards/generated": -542.475341796875, + "rewards/margins": 552.6605224609375, + "rewards/real": 10.185166358947754, + "step": 435 + }, + { + "epoch": 0.91, + "grad_norm": 28.126094813760695, + "learning_rate": 1.1677799800364957e-08, + "logits/generated": -0.07881470024585724, + "logits/oppo_generated": -2.677896499633789, + "logits/oppo_real": -2.768991470336914, + "logits/real": -2.2843446731567383, + "logps/generated": -792.887939453125, + "logps/oppo_gen": -83.10468292236328, + "logps/oppo_real": -386.85968017578125, + "logps/real": -352.83319091796875, + "loss": 0.2329, + "loss/gen": 0.0062789106741547585, + "loss/real": 0.15011855959892273, + "rewards/accuracies": 1.0, + "rewards/generated": -709.7833251953125, + "rewards/margins": 743.8096923828125, + "rewards/real": 34.026451110839844, + "step": 436 + }, + { + "epoch": 0.91, + "grad_norm": 27.066375525401092, + "learning_rate": 1.1132449796060873e-08, + "logits/generated": 0.4625094532966614, + "logits/oppo_generated": -2.781583309173584, + "logits/oppo_real": -2.9280033111572266, + "logits/real": -1.8447434902191162, + "logps/generated": -665.0208129882812, + "logps/oppo_gen": -66.32365417480469, + "logps/oppo_real": -339.19329833984375, + "logps/real": -353.3502197265625, + "loss": 0.3796, + "loss/gen": 0.04067763313651085, + "loss/real": 0.3821491003036499, + "rewards/accuracies": 1.0, + "rewards/generated": -598.6971435546875, + "rewards/margins": 584.5401611328125, + "rewards/real": -14.156988143920898, + "step": 437 + }, + { + "epoch": 0.92, + "grad_norm": 38.24305173308447, + "learning_rate": 1.0599850022898537e-08, + "logits/generated": 0.2761492133140564, + "logits/oppo_generated": -2.4901785850524902, + "logits/oppo_real": -2.6094114780426025, + "logits/real": -1.6084918975830078, + "logps/generated": -875.5096435546875, + "logps/oppo_gen": -53.723915100097656, + "logps/oppo_real": -182.81817626953125, + "logps/real": -147.58262634277344, + "loss": 0.3627, + "loss/gen": 0.16688652336597443, + "loss/real": 0.15126757323741913, + "rewards/accuracies": 1.0, + "rewards/generated": -821.7857055664062, + "rewards/margins": 857.021240234375, + "rewards/real": 35.23555374145508, + "step": 438 + }, + { + "epoch": 0.92, + "grad_norm": 23.49628695109679, + "learning_rate": 1.0080028909897232e-08, + "logits/generated": 0.6079811453819275, + "logits/oppo_generated": -2.515484571456909, + "logits/oppo_real": -2.485753297805786, + "logits/real": -1.7542835474014282, + "logps/generated": -616.97265625, + "logps/oppo_gen": -80.76638793945312, + "logps/oppo_real": -202.42733764648438, + "logps/real": -179.23358154296875, + "loss": 0.2542, + "loss/gen": 0.20540261268615723, + "loss/real": 0.18645432591438293, + "rewards/accuracies": 1.0, + "rewards/generated": -536.206298828125, + "rewards/margins": 559.4000244140625, + "rewards/real": 23.193754196166992, + "step": 439 + }, + { + "epoch": 0.92, + "grad_norm": 23.16906564856131, + "learning_rate": 9.57301420397924e-09, + "logits/generated": 0.6640872359275818, + "logits/oppo_generated": -2.4124038219451904, + "logits/oppo_real": -2.5056729316711426, + "logits/real": -1.7653595209121704, + "logps/generated": -810.7935180664062, + "logps/oppo_gen": -60.34915542602539, + "logps/oppo_real": -184.50430297851562, + "logps/real": -151.20101928710938, + "loss": 0.2569, + "loss/gen": 0.09386168420314789, + "loss/real": 0.14957281947135925, + "rewards/accuracies": 1.0, + "rewards/generated": -750.4443969726562, + "rewards/margins": 783.7476806640625, + "rewards/real": 33.30329132080078, + "step": 440 + }, + { + "epoch": 0.92, + "grad_norm": 17.140997994745213, + "learning_rate": 9.078832968488632e-09, + "logits/generated": 0.8487217426300049, + "logits/oppo_generated": -2.730281352996826, + "logits/oppo_real": -2.6713297367095947, + "logits/real": -2.253817081451416, + "logps/generated": -688.5202026367188, + "logps/oppo_gen": -72.50233459472656, + "logps/oppo_real": -303.8031921386719, + "logps/real": -273.73455810546875, + "loss": 0.2646, + "loss/gen": 0.04261288791894913, + "loss/real": 0.1547403186559677, + "rewards/accuracies": 1.0, + "rewards/generated": -616.017822265625, + "rewards/margins": 646.08642578125, + "rewards/real": 30.06861686706543, + "step": 441 + }, + { + "epoch": 0.92, + "grad_norm": 41.81041690672389, + "learning_rate": 8.597511581746625e-09, + "logits/generated": 0.5856022834777832, + "logits/oppo_generated": -2.798659324645996, + "logits/oppo_real": -2.809553384780884, + "logits/real": -1.9602468013763428, + "logps/generated": -650.50244140625, + "logps/oppo_gen": -71.67813110351562, + "logps/oppo_real": -249.2584228515625, + "logps/real": -257.9971008300781, + "loss": 0.3944, + "loss/gen": 0.13761942088603973, + "loss/real": 0.3325386047363281, + "rewards/accuracies": 1.0, + "rewards/generated": -578.82421875, + "rewards/margins": 570.0855712890625, + "rewards/real": -8.738664627075195, + "step": 442 + }, + { + "epoch": 0.93, + "grad_norm": 30.877363230343903, + "learning_rate": 8.129075735643698e-09, + "logits/generated": -0.38423842191696167, + "logits/oppo_generated": -2.6113674640655518, + "logits/oppo_real": -2.7933380603790283, + "logits/real": -1.9059512615203857, + "logps/generated": -570.1771240234375, + "logps/oppo_gen": -70.2498779296875, + "logps/oppo_real": -249.642822265625, + "logps/real": -235.60647583007812, + "loss": 0.3212, + "loss/gen": 0.09656841307878494, + "loss/real": 0.22677713632583618, + "rewards/accuracies": 0.9375, + "rewards/generated": -499.92724609375, + "rewards/margins": 513.963623046875, + "rewards/real": 14.036325454711914, + "step": 443 + }, + { + "epoch": 0.93, + "grad_norm": 32.42138930359092, + "learning_rate": 7.673550434268123e-09, + "logits/generated": 0.3789711892604828, + "logits/oppo_generated": -2.5773496627807617, + "logits/oppo_real": -2.756646156311035, + "logits/real": -1.5774751901626587, + "logps/generated": -713.83984375, + "logps/oppo_gen": -67.772705078125, + "logps/oppo_real": -219.92550659179688, + "logps/real": -217.2390594482422, + "loss": 0.2387, + "loss/gen": 0.13536104559898376, + "loss/real": 0.26523107290267944, + "rewards/accuracies": 0.9375, + "rewards/generated": -646.0671997070312, + "rewards/margins": 648.753662109375, + "rewards/real": 2.686431884765625, + "step": 444 + }, + { + "epoch": 0.93, + "grad_norm": 35.38876581761153, + "learning_rate": 7.230959992571367e-09, + "logits/generated": -0.14755092561244965, + "logits/oppo_generated": -2.5365023612976074, + "logits/oppo_real": -2.726724624633789, + "logits/real": -1.5336180925369263, + "logps/generated": -655.31982421875, + "logps/oppo_gen": -64.62477111816406, + "logps/oppo_real": -251.58450317382812, + "logps/real": -238.06442260742188, + "loss": 0.3278, + "loss/gen": 0.14248934388160706, + "loss/real": 0.23805347084999084, + "rewards/accuracies": 0.9375, + "rewards/generated": -590.695068359375, + "rewards/margins": 604.215087890625, + "rewards/real": 13.520094871520996, + "step": 445 + }, + { + "epoch": 0.93, + "grad_norm": 20.637570890862875, + "learning_rate": 6.801328035070136e-09, + "logits/generated": -0.11328282952308655, + "logits/oppo_generated": -2.7569596767425537, + "logits/oppo_real": -2.9033079147338867, + "logits/real": -1.7462917566299438, + "logps/generated": -716.6192626953125, + "logps/oppo_gen": -84.91046142578125, + "logps/oppo_real": -289.9277038574219, + "logps/real": -317.48663330078125, + "loss": 0.3271, + "loss/gen": 0.16633838415145874, + "loss/real": 0.4990673363208771, + "rewards/accuracies": 1.0, + "rewards/generated": -631.7088623046875, + "rewards/margins": 604.1499633789062, + "rewards/real": -27.558929443359375, + "step": 446 + }, + { + "epoch": 0.94, + "grad_norm": 78.71668442109335, + "learning_rate": 6.38467749458535e-09, + "logits/generated": 1.1556400060653687, + "logits/oppo_generated": -2.6448044776916504, + "logits/oppo_real": -2.7699906826019287, + "logits/real": -1.9819279909133911, + "logps/generated": -715.257568359375, + "logps/oppo_gen": -44.87987518310547, + "logps/oppo_real": -182.60540771484375, + "logps/real": -185.8890380859375, + "loss": 0.3929, + "loss/gen": 0.1545814424753189, + "loss/real": 0.30889979004859924, + "rewards/accuracies": 0.9375, + "rewards/generated": -670.377685546875, + "rewards/margins": 667.093994140625, + "rewards/real": -3.2836532592773438, + "step": 447 + }, + { + "epoch": 0.94, + "grad_norm": 45.8729403914361, + "learning_rate": 5.981030611018234e-09, + "logits/generated": -0.1802346408367157, + "logits/oppo_generated": -2.7766942977905273, + "logits/oppo_real": -2.8434033393859863, + "logits/real": -2.057481527328491, + "logps/generated": -569.3570556640625, + "logps/oppo_gen": -67.70870971679688, + "logps/oppo_real": -264.5067138671875, + "logps/real": -242.7213134765625, + "loss": 0.3662, + "loss/gen": 0.2699339985847473, + "loss/real": 0.1798723340034485, + "rewards/accuracies": 0.9375, + "rewards/generated": -501.6483154296875, + "rewards/margins": 523.4337768554688, + "rewards/real": 21.785402297973633, + "step": 448 + }, + { + "epoch": 0.94, + "grad_norm": 47.73978079046734, + "learning_rate": 5.590408930162799e-09, + "logits/generated": -0.4504716992378235, + "logits/oppo_generated": -2.562833309173584, + "logits/oppo_real": -2.5892672538757324, + "logits/real": -1.9565715789794922, + "logps/generated": -655.5435791015625, + "logps/oppo_gen": -69.748779296875, + "logps/oppo_real": -302.15716552734375, + "logps/real": -293.0556640625, + "loss": 0.3746, + "loss/gen": 0.044189054518938065, + "loss/real": 0.27661794424057007, + "rewards/accuracies": 1.0, + "rewards/generated": -585.7947998046875, + "rewards/margins": 594.8963012695312, + "rewards/real": 9.101491928100586, + "step": 449 + }, + { + "epoch": 0.94, + "grad_norm": 36.17708183366923, + "learning_rate": 5.212833302556258e-09, + "logits/generated": 0.7933423519134521, + "logits/oppo_generated": -2.8123812675476074, + "logits/oppo_real": -2.7088489532470703, + "logits/real": -2.274021625518799, + "logps/generated": -740.898681640625, + "logps/oppo_gen": -99.06866455078125, + "logps/oppo_real": -265.1106262207031, + "logps/real": -246.4234619140625, + "loss": 0.2583, + "loss/gen": 0.02867821604013443, + "loss/real": 0.17321571707725525, + "rewards/accuracies": 1.0, + "rewards/generated": -641.8300170898438, + "rewards/margins": 660.5171508789062, + "rewards/real": 18.687145233154297, + "step": 450 + }, + { + "epoch": 0.94, + "grad_norm": 32.52973962331911, + "learning_rate": 4.848323882365668e-09, + "logits/generated": -0.29569417238235474, + "logits/oppo_generated": -2.724055290222168, + "logits/oppo_real": -2.7838587760925293, + "logits/real": -1.9440395832061768, + "logps/generated": -734.2264404296875, + "logps/oppo_gen": -72.73839569091797, + "logps/oppo_real": -309.91082763671875, + "logps/real": -293.07879638671875, + "loss": 0.2935, + "loss/gen": 0.03302386775612831, + "loss/real": 0.19048979878425598, + "rewards/accuracies": 1.0, + "rewards/generated": -661.488037109375, + "rewards/margins": 678.320068359375, + "rewards/real": 16.832040786743164, + "step": 451 + }, + { + "epoch": 0.95, + "grad_norm": 22.549430295256354, + "learning_rate": 4.496900126312431e-09, + "logits/generated": 0.7788766622543335, + "logits/oppo_generated": -2.452781915664673, + "logits/oppo_real": -2.67744779586792, + "logits/real": -1.6308937072753906, + "logps/generated": -589.6851806640625, + "logps/oppo_gen": -52.5155029296875, + "logps/oppo_real": -223.33140563964844, + "logps/real": -205.42462158203125, + "loss": 0.3002, + "loss/gen": 0.08183200657367706, + "loss/real": 0.2042657434940338, + "rewards/accuracies": 1.0, + "rewards/generated": -537.169677734375, + "rewards/margins": 555.076416015625, + "rewards/real": 17.906780242919922, + "step": 452 + }, + { + "epoch": 0.95, + "grad_norm": 34.657315484635596, + "learning_rate": 4.158580792633482e-09, + "logits/generated": 1.6620922088623047, + "logits/oppo_generated": -2.679365634918213, + "logits/oppo_real": -2.71950101852417, + "logits/real": -2.00820255279541, + "logps/generated": -671.602294921875, + "logps/oppo_gen": -63.20152282714844, + "logps/oppo_real": -247.9625244140625, + "logps/real": -214.57554626464844, + "loss": 0.2825, + "loss/gen": 0.035797297954559326, + "loss/real": 0.15221944451332092, + "rewards/accuracies": 1.0, + "rewards/generated": -608.4007568359375, + "rewards/margins": 641.7877197265625, + "rewards/real": 33.38697814941406, + "step": 453 + }, + { + "epoch": 0.95, + "grad_norm": 31.58702249713282, + "learning_rate": 3.833383940080231e-09, + "logits/generated": 0.5262564420700073, + "logits/oppo_generated": -2.853964328765869, + "logits/oppo_real": -2.8066015243530273, + "logits/real": -2.2472078800201416, + "logps/generated": -775.4197998046875, + "logps/oppo_gen": -89.24186706542969, + "logps/oppo_real": -313.77490234375, + "logps/real": -293.4193420410156, + "loss": 0.293, + "loss/gen": 0.056602317839860916, + "loss/real": 0.16948629915714264, + "rewards/accuracies": 1.0, + "rewards/generated": -686.177978515625, + "rewards/margins": 706.533447265625, + "rewards/real": 20.35555648803711, + "step": 454 + }, + { + "epoch": 0.95, + "grad_norm": 24.27373169277787, + "learning_rate": 3.521326926954532e-09, + "logits/generated": -0.06063704192638397, + "logits/oppo_generated": -2.945572853088379, + "logits/oppo_real": -2.857299327850342, + "logits/real": -2.3750839233398438, + "logps/generated": -742.7252197265625, + "logps/oppo_gen": -75.34283447265625, + "logps/oppo_real": -377.9549560546875, + "logps/real": -361.899658203125, + "loss": 0.2313, + "loss/gen": 0.11743300408124924, + "loss/real": 0.24999423325061798, + "rewards/accuracies": 1.0, + "rewards/generated": -667.38232421875, + "rewards/margins": 683.4376220703125, + "rewards/real": 16.055313110351562, + "step": 455 + }, + { + "epoch": 0.95, + "grad_norm": 19.96226374630275, + "learning_rate": 3.2224264101821108e-09, + "logits/generated": 0.8330753445625305, + "logits/oppo_generated": -2.574817657470703, + "logits/oppo_real": -2.46460223197937, + "logits/real": -1.7454832792282104, + "logps/generated": -636.5455322265625, + "logps/oppo_gen": -70.7989501953125, + "logps/oppo_real": -374.28759765625, + "logps/real": -379.6514587402344, + "loss": 0.3385, + "loss/gen": 0.19702856242656708, + "loss/real": 0.40760496258735657, + "rewards/accuracies": 0.9375, + "rewards/generated": -565.74658203125, + "rewards/margins": 560.3827514648438, + "rewards/real": -5.363855361938477, + "step": 456 + }, + { + "epoch": 0.96, + "grad_norm": 27.184910302307994, + "learning_rate": 2.936698344423505e-09, + "logits/generated": -0.41058021783828735, + "logits/oppo_generated": -2.9557366371154785, + "logits/oppo_real": -2.8256478309631348, + "logits/real": -2.5244743824005127, + "logps/generated": -757.3851318359375, + "logps/oppo_gen": -91.42108154296875, + "logps/oppo_real": -355.4400634765625, + "logps/real": -349.91094970703125, + "loss": 0.2823, + "loss/gen": 0.01450443733483553, + "loss/real": 0.27503716945648193, + "rewards/accuracies": 1.0, + "rewards/generated": -665.9639892578125, + "rewards/margins": 671.4931640625, + "rewards/real": 5.529134750366211, + "step": 457 + }, + { + "epoch": 0.96, + "grad_norm": 34.244138341359786, + "learning_rate": 2.664157981222437e-09, + "logits/generated": -0.20761600136756897, + "logits/oppo_generated": -2.706226348876953, + "logits/oppo_real": -2.7390899658203125, + "logits/real": -2.1303887367248535, + "logps/generated": -750.4144287109375, + "logps/oppo_gen": -83.41449737548828, + "logps/oppo_real": -231.79632568359375, + "logps/real": -206.0238037109375, + "loss": 0.2741, + "loss/gen": 0.05449531227350235, + "loss/real": 0.16203711926937103, + "rewards/accuracies": 1.0, + "rewards/generated": -666.9998779296875, + "rewards/margins": 692.7724609375, + "rewards/real": 25.772523880004883, + "step": 458 + }, + { + "epoch": 0.96, + "grad_norm": 48.74286055493794, + "learning_rate": 2.4048198681917154e-09, + "logits/generated": 0.6840654015541077, + "logits/oppo_generated": -2.736264705657959, + "logits/oppo_real": -2.906867742538452, + "logits/real": -1.9091205596923828, + "logps/generated": -722.01708984375, + "logps/oppo_gen": -76.57756042480469, + "logps/oppo_real": -386.65283203125, + "logps/real": -359.4886474609375, + "loss": 0.3591, + "loss/gen": 0.03984036296606064, + "loss/real": 0.1646542102098465, + "rewards/accuracies": 1.0, + "rewards/generated": -645.439453125, + "rewards/margins": 672.6036376953125, + "rewards/real": 27.164186477661133, + "step": 459 + }, + { + "epoch": 0.96, + "grad_norm": 33.56559139626844, + "learning_rate": 2.158697848236607e-09, + "logits/generated": -0.3475767970085144, + "logits/oppo_generated": -2.688716411590576, + "logits/oppo_real": -2.695068359375, + "logits/real": -1.9637393951416016, + "logps/generated": -757.3863525390625, + "logps/oppo_gen": -84.60350036621094, + "logps/oppo_real": -197.73443603515625, + "logps/real": -187.57839965820312, + "loss": 0.2813, + "loss/gen": 0.021964294835925102, + "loss/real": 0.23046338558197021, + "rewards/accuracies": 1.0, + "rewards/generated": -672.782958984375, + "rewards/margins": 682.93896484375, + "rewards/real": 10.156058311462402, + "step": 460 + }, + { + "epoch": 0.96, + "grad_norm": 20.376350217675377, + "learning_rate": 1.9258050588161766e-09, + "logits/generated": 0.5216965675354004, + "logits/oppo_generated": -2.673025369644165, + "logits/oppo_real": -2.726318836212158, + "logits/real": -2.057188034057617, + "logps/generated": -774.2880249023438, + "logps/oppo_gen": -65.60742950439453, + "logps/oppo_real": -229.56605529785156, + "logps/real": -203.36492919921875, + "loss": 0.2584, + "loss/gen": 0.03890099376440048, + "loss/real": 0.16776156425476074, + "rewards/accuracies": 1.0, + "rewards/generated": -708.6805419921875, + "rewards/margins": 734.8817138671875, + "rewards/real": 26.201156616210938, + "step": 461 + }, + { + "epoch": 0.97, + "grad_norm": 26.108272764361132, + "learning_rate": 1.7061539312417107e-09, + "logits/generated": 1.1993634700775146, + "logits/oppo_generated": -2.521968126296997, + "logits/oppo_real": -2.3472464084625244, + "logits/real": -1.8873664140701294, + "logps/generated": -759.1806640625, + "logps/oppo_gen": -79.24931335449219, + "logps/oppo_real": -204.78036499023438, + "logps/real": -183.12274169921875, + "loss": 0.264, + "loss/gen": 0.04187949746847153, + "loss/real": 0.1729775071144104, + "rewards/accuracies": 1.0, + "rewards/generated": -679.9313354492188, + "rewards/margins": 701.5889892578125, + "rewards/real": 21.657615661621094, + "step": 462 + }, + { + "epoch": 0.97, + "grad_norm": 31.69596530015443, + "learning_rate": 1.4997561900135236e-09, + "logits/generated": 0.6135013699531555, + "logits/oppo_generated": -2.7849419116973877, + "logits/oppo_real": -2.7172937393188477, + "logits/real": -2.012725591659546, + "logps/generated": -804.5126953125, + "logps/oppo_gen": -67.21133422851562, + "logps/oppo_real": -265.61785888671875, + "logps/real": -281.4776611328125, + "loss": 0.3281, + "loss/gen": 0.07727505266666412, + "loss/real": 0.3425220847129822, + "rewards/accuracies": 1.0, + "rewards/generated": -737.3013916015625, + "rewards/margins": 721.441650390625, + "rewards/real": -15.859792709350586, + "step": 463 + }, + { + "epoch": 0.97, + "grad_norm": 46.92561590149217, + "learning_rate": 1.3066228521948219e-09, + "logits/generated": -0.23002564907073975, + "logits/oppo_generated": -2.977627754211426, + "logits/oppo_real": -2.8772168159484863, + "logits/real": -2.613190174102783, + "logps/generated": -731.850341796875, + "logps/oppo_gen": -94.8328857421875, + "logps/oppo_real": -499.3686828613281, + "logps/real": -466.97076416015625, + "loss": 0.2805, + "loss/gen": 0.03973410278558731, + "loss/real": 0.1506689488887787, + "rewards/accuracies": 1.0, + "rewards/generated": -637.017333984375, + "rewards/margins": 669.4153442382812, + "rewards/real": 32.39790725708008, + "step": 464 + }, + { + "epoch": 0.97, + "grad_norm": 30.14508881052652, + "learning_rate": 1.126764226823812e-09, + "logits/generated": 0.786093533039093, + "logits/oppo_generated": -2.6228816509246826, + "logits/oppo_real": -2.6832354068756104, + "logits/real": -1.7279415130615234, + "logps/generated": -825.026611328125, + "logps/oppo_gen": -65.4859619140625, + "logps/oppo_real": -178.48684692382812, + "logps/real": -175.68511962890625, + "loss": 0.3055, + "loss/gen": 0.07098890841007233, + "loss/real": 0.29667070508003235, + "rewards/accuracies": 1.0, + "rewards/generated": -759.540771484375, + "rewards/margins": 762.3424072265625, + "rewards/real": 2.8017330169677734, + "step": 465 + }, + { + "epoch": 0.97, + "grad_norm": 27.821471955268876, + "learning_rate": 9.60189914363363e-10, + "logits/generated": 0.8292776346206665, + "logits/oppo_generated": -2.7453532218933105, + "logits/oppo_real": -2.565095901489258, + "logits/real": -2.1934657096862793, + "logps/generated": -717.3662109375, + "logps/oppo_gen": -77.26994323730469, + "logps/oppo_real": -337.29949951171875, + "logps/real": -320.40087890625, + "loss": 0.2969, + "loss/gen": 0.027588829398155212, + "loss/real": 0.21265217661857605, + "rewards/accuracies": 1.0, + "rewards/generated": -640.0963134765625, + "rewards/margins": 656.9949951171875, + "rewards/real": 16.89865493774414, + "step": 466 + }, + { + "epoch": 0.98, + "grad_norm": 23.094122650071867, + "learning_rate": 8.069088061885276e-10, + "logits/generated": 0.2578420042991638, + "logits/oppo_generated": -2.66814923286438, + "logits/oppo_real": -2.721468448638916, + "logits/real": -1.9464046955108643, + "logps/generated": -617.5042724609375, + "logps/oppo_gen": -68.08262634277344, + "logps/oppo_real": -245.5079345703125, + "logps/real": -245.2777099609375, + "loss": 0.2767, + "loss/gen": 0.13515013456344604, + "loss/real": 0.2663593292236328, + "rewards/accuracies": 1.0, + "rewards/generated": -549.421630859375, + "rewards/margins": 549.65185546875, + "rewards/real": 0.23020458221435547, + "step": 467 + }, + { + "epoch": 0.98, + "grad_norm": 32.73517675032472, + "learning_rate": 6.66929084112089e-10, + "logits/generated": -0.0217684805393219, + "logits/oppo_generated": -2.8137381076812744, + "logits/oppo_real": -2.701899528503418, + "logits/real": -2.3723936080932617, + "logps/generated": -732.671875, + "logps/oppo_gen": -95.51661682128906, + "logps/oppo_real": -365.8069152832031, + "logps/real": -337.0956115722656, + "loss": 0.3006, + "loss/gen": 0.017105087637901306, + "loss/real": 0.17143797874450684, + "rewards/accuracies": 1.0, + "rewards/generated": -637.1552124023438, + "rewards/margins": 665.8665161132812, + "rewards/real": 28.711307525634766, + "step": 468 + }, + { + "epoch": 0.98, + "grad_norm": 35.94339965161372, + "learning_rate": 5.402582199476036e-10, + "logits/generated": 0.8715763092041016, + "logits/oppo_generated": -2.877156972885132, + "logits/oppo_real": -2.714019298553467, + "logits/real": -2.1983542442321777, + "logps/generated": -741.3756103515625, + "logps/oppo_gen": -62.887359619140625, + "logps/oppo_real": -226.98411560058594, + "logps/real": -201.53179931640625, + "loss": 0.2849, + "loss/gen": 0.026809904724359512, + "loss/real": 0.16713371872901917, + "rewards/accuracies": 1.0, + "rewards/generated": -678.4882202148438, + "rewards/margins": 703.9405517578125, + "rewards/real": 25.452287673950195, + "step": 469 + }, + { + "epoch": 0.98, + "grad_norm": 48.31405641089351, + "learning_rate": 4.269029751107489e-10, + "logits/generated": 0.46310731768608093, + "logits/oppo_generated": -2.614527702331543, + "logits/oppo_real": -2.721973419189453, + "logits/real": -1.800484299659729, + "logps/generated": -641.4134521484375, + "logps/oppo_gen": -52.66883087158203, + "logps/oppo_real": -169.48345947265625, + "logps/real": -150.42047119140625, + "loss": 0.3255, + "loss/gen": 0.054055292159318924, + "loss/real": 0.17102661728858948, + "rewards/accuracies": 1.0, + "rewards/generated": -588.7446899414062, + "rewards/margins": 607.8077392578125, + "rewards/real": 19.062992095947266, + "step": 470 + }, + { + "epoch": 0.99, + "grad_norm": 27.930841322320678, + "learning_rate": 3.2686940025836164e-10, + "logits/generated": 1.4157593250274658, + "logits/oppo_generated": -2.5730538368225098, + "logits/oppo_real": -2.5419564247131348, + "logits/real": -1.729305386543274, + "logps/generated": -710.6050415039062, + "logps/oppo_gen": -77.87423706054688, + "logps/oppo_real": -270.4675598144531, + "logps/real": -268.3628234863281, + "loss": 0.3195, + "loss/gen": 0.027144107967615128, + "loss/real": 0.2538290023803711, + "rewards/accuracies": 1.0, + "rewards/generated": -632.7308349609375, + "rewards/margins": 634.8355102539062, + "rewards/real": 2.104731559753418, + "step": 471 + }, + { + "epoch": 0.99, + "grad_norm": 39.48121833938892, + "learning_rate": 2.4016283496544607e-10, + "logits/generated": -0.10758259892463684, + "logits/oppo_generated": -2.6745200157165527, + "logits/oppo_real": -2.645476818084717, + "logits/real": -1.9466335773468018, + "logps/generated": -801.3179321289062, + "logps/oppo_gen": -101.33052825927734, + "logps/oppo_real": -358.234375, + "logps/real": -353.99456787109375, + "loss": 0.3131, + "loss/gen": 0.04514200612902641, + "loss/real": 0.2479880452156067, + "rewards/accuracies": 1.0, + "rewards/generated": -699.9874267578125, + "rewards/margins": 704.227294921875, + "rewards/real": 4.239832878112793, + "step": 472 + }, + { + "epoch": 0.99, + "grad_norm": 24.568082917521007, + "learning_rate": 1.6678790744015236e-10, + "logits/generated": 0.45618465542793274, + "logits/oppo_generated": -2.753333330154419, + "logits/oppo_real": -2.8271706104278564, + "logits/real": -2.0780029296875, + "logps/generated": -710.4241943359375, + "logps/oppo_gen": -72.44290924072266, + "logps/oppo_real": -266.6786193847656, + "logps/real": -258.2034912109375, + "loss": 0.2472, + "loss/gen": 0.012053638696670532, + "loss/real": 0.21844035387039185, + "rewards/accuracies": 1.0, + "rewards/generated": -637.9812622070312, + "rewards/margins": 646.4563598632812, + "rewards/real": 8.475127220153809, + "step": 473 + }, + { + "epoch": 0.99, + "grad_norm": 30.222697397622888, + "learning_rate": 1.0674853427683484e-10, + "logits/generated": -0.06531578302383423, + "logits/oppo_generated": -2.4703428745269775, + "logits/oppo_real": -2.778745174407959, + "logits/real": -1.5166780948638916, + "logps/generated": -741.3477783203125, + "logps/oppo_gen": -59.79944610595703, + "logps/oppo_real": -181.95498657226562, + "logps/real": -220.44729614257812, + "loss": 0.3104, + "loss/gen": 0.060335446149110794, + "loss/real": 0.39393651485443115, + "rewards/accuracies": 1.0, + "rewards/generated": -681.5482788085938, + "rewards/margins": 643.055908203125, + "rewards/real": -38.49231719970703, + "step": 474 + }, + { + "epoch": 0.99, + "grad_norm": 30.308133343664434, + "learning_rate": 6.004792024680294e-11, + "logits/generated": 0.3023368716239929, + "logits/oppo_generated": -2.767500877380371, + "logits/oppo_real": -2.7556655406951904, + "logits/real": -2.2175793647766113, + "logps/generated": -747.4019775390625, + "logps/oppo_gen": -77.92488098144531, + "logps/oppo_real": -223.17697143554688, + "logps/real": -190.78720092773438, + "loss": 0.2788, + "loss/gen": 0.049265552312135696, + "loss/real": 0.15117153525352478, + "rewards/accuracies": 1.0, + "rewards/generated": -669.4771118164062, + "rewards/margins": 701.8668823242188, + "rewards/real": 32.3897590637207, + "step": 475 + }, + { + "epoch": 1.0, + "grad_norm": 24.058693641719074, + "learning_rate": 2.6688558127485604e-11, + "logits/generated": -0.8989108800888062, + "logits/oppo_generated": -2.83430814743042, + "logits/oppo_real": -3.1122868061065674, + "logits/real": -2.3030447959899902, + "logps/generated": -717.5481567382812, + "logps/oppo_gen": -86.60485076904297, + "logps/oppo_real": -374.5808410644531, + "logps/real": -337.60858154296875, + "loss": 0.2793, + "loss/gen": 0.09490139782428741, + "loss/real": 0.14539852738380432, + "rewards/accuracies": 1.0, + "rewards/generated": -630.943359375, + "rewards/margins": 667.91552734375, + "rewards/real": 36.972225189208984, + "step": 476 + }, + { + "epoch": 1.0, + "grad_norm": 26.022392606987303, + "learning_rate": 6.672228569148952e-12, + "logits/generated": 0.45034003257751465, + "logits/oppo_generated": -2.839545488357544, + "logits/oppo_real": -2.7138943672180176, + "logits/real": -2.2914600372314453, + "logps/generated": -687.2457885742188, + "logps/oppo_gen": -68.36036682128906, + "logps/oppo_real": -309.4075622558594, + "logps/real": -309.28704833984375, + "loss": 0.2662, + "loss/gen": 0.031270068138837814, + "loss/real": 0.27253997325897217, + "rewards/accuracies": 1.0, + "rewards/generated": -618.8853759765625, + "rewards/margins": 619.0059204101562, + "rewards/real": 0.1204981803894043, + "step": 477 + }, + { + "epoch": 1.0, + "grad_norm": 25.07323136638642, + "learning_rate": 0.0, + "logits/generated": -0.01666666567325592, + "logits/oppo_generated": -2.699960231781006, + "logits/oppo_real": -2.9887475967407227, + "logits/real": -2.0640759468078613, + "logps/generated": -709.115234375, + "logps/oppo_gen": -71.9469223022461, + "logps/oppo_real": -258.2105712890625, + "logps/real": -262.96441650390625, + "loss": 0.2512, + "loss/gen": 0.017528638243675232, + "loss/real": 0.30680006742477417, + "rewards/accuracies": 1.0, + "rewards/generated": -637.1682739257812, + "rewards/margins": 632.4144287109375, + "rewards/real": -4.753854274749756, + "step": 478 + }, + { + "epoch": 1.0, + "step": 478, + "total_flos": 0.0, + "train_loss": 0.5313476455012126, + "train_runtime": 13675.2672, + "train_samples_per_second": 4.47, + "train_steps_per_second": 0.035 + } + ], + "logging_steps": 1.0, + "max_steps": 478, + "num_input_tokens_seen": 0, + "num_train_epochs": 1, + "save_steps": 96, + "total_flos": 0.0, + "train_batch_size": 8, + "trial_name": null, + "trial_params": null +}