{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.9064422143088378, "eval_steps": 100, "global_step": 700, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": "2.5641e-07", "loss": 0.5987, "slid_loss": 0.5987, "step": 1, "time": 416.96 }, { "epoch": 0.0, "learning_rate": "5.1282e-07", "loss": 0.6236, "slid_loss": 0.6112, "step": 2, "time": 308.68 }, { "epoch": 0.0, "learning_rate": "7.6923e-07", "loss": 0.61, "slid_loss": 0.6108, "step": 3, "time": 303.89 }, { "epoch": 0.01, "learning_rate": "1.0256e-06", "loss": 0.6193, "slid_loss": 0.6129, "step": 4, "time": 300.55 }, { "epoch": 0.01, "learning_rate": "1.2821e-06", "loss": 0.62, "slid_loss": 0.6143, "step": 5, "time": 301.85 }, { "epoch": 0.01, "learning_rate": "1.5385e-06", "loss": 0.6036, "slid_loss": 0.6125, "step": 6, "time": 303.95 }, { "epoch": 0.01, "learning_rate": "1.7949e-06", "loss": 0.6116, "slid_loss": 0.6124, "step": 7, "time": 301.83 }, { "epoch": 0.01, "learning_rate": "2.0513e-06", "loss": 0.5796, "slid_loss": 0.6083, "step": 8, "time": 304.86 }, { "epoch": 0.01, "learning_rate": "2.3077e-06", "loss": 0.6255, "slid_loss": 0.6102, "step": 9, "time": 299.19 }, { "epoch": 0.01, "learning_rate": "2.5641e-06", "loss": 0.6125, "slid_loss": 0.6104, "step": 10, "time": 303.97 }, { "epoch": 0.01, "learning_rate": "2.8205e-06", "loss": 0.6187, "slid_loss": 0.6112, "step": 11, "time": 299.93 }, { "epoch": 0.02, "learning_rate": "3.0769e-06", "loss": 0.6229, "slid_loss": 0.6122, "step": 12, "time": 299.47 }, { "epoch": 0.02, "learning_rate": "3.3333e-06", "loss": 0.6, "slid_loss": 0.6112, "step": 13, "time": 297.05 }, { "epoch": 0.02, "learning_rate": "3.5897e-06", "loss": 0.6079, "slid_loss": 0.611, "step": 14, "time": 300.11 }, { "epoch": 0.02, "learning_rate": "3.8462e-06", "loss": 0.5676, "slid_loss": 0.6081, "step": 15, "time": 301.79 }, { "epoch": 0.02, "learning_rate": "4.1026e-06", "loss": 0.5754, "slid_loss": 0.606, "step": 16, "time": 294.41 }, { "epoch": 0.02, "learning_rate": "4.3590e-06", "loss": 0.5977, "slid_loss": 0.6056, "step": 17, "time": 299.96 }, { "epoch": 0.02, "learning_rate": "4.6154e-06", "loss": 0.5934, "slid_loss": 0.6049, "step": 18, "time": 300.94 }, { "epoch": 0.02, "learning_rate": "4.8718e-06", "loss": 0.6201, "slid_loss": 0.6057, "step": 19, "time": 299.48 }, { "epoch": 0.03, "learning_rate": "5.1282e-06", "loss": 0.6358, "slid_loss": 0.6072, "step": 20, "time": 303.69 }, { "epoch": 0.03, "learning_rate": "5.3846e-06", "loss": 0.6163, "slid_loss": 0.6076, "step": 21, "time": 302.21 }, { "epoch": 0.03, "learning_rate": "5.6410e-06", "loss": 0.5778, "slid_loss": 0.6063, "step": 22, "time": 299.39 }, { "epoch": 0.03, "learning_rate": "5.8974e-06", "loss": 0.6006, "slid_loss": 0.606, "step": 23, "time": 303.27 }, { "epoch": 0.03, "learning_rate": "6.1538e-06", "loss": 0.5979, "slid_loss": 0.6057, "step": 24, "time": 301.63 }, { "epoch": 0.03, "learning_rate": "6.4103e-06", "loss": 0.5862, "slid_loss": 0.6049, "step": 25, "time": 298.76 }, { "epoch": 0.03, "learning_rate": "6.6667e-06", "loss": 0.5932, "slid_loss": 0.6045, "step": 26, "time": 301.27 }, { "epoch": 0.03, "learning_rate": "6.9231e-06", "loss": 0.6079, "slid_loss": 0.6046, "step": 27, "time": 301.95 }, { "epoch": 0.04, "learning_rate": "7.1795e-06", "loss": 0.6066, "slid_loss": 0.6047, "step": 28, "time": 306.01 }, { "epoch": 0.04, "learning_rate": "7.4359e-06", "loss": 0.6065, "slid_loss": 0.6047, "step": 29, "time": 305.69 }, { "epoch": 0.04, "learning_rate": "7.6923e-06", "loss": 0.6172, "slid_loss": 0.6051, "step": 30, "time": 307.86 }, { "epoch": 0.04, "learning_rate": "7.9487e-06", "loss": 0.5885, "slid_loss": 0.6046, "step": 31, "time": 302.34 }, { "epoch": 0.04, "learning_rate": "8.2051e-06", "loss": 0.616, "slid_loss": 0.605, "step": 32, "time": 297.77 }, { "epoch": 0.04, "learning_rate": "8.4615e-06", "loss": 0.5881, "slid_loss": 0.6044, "step": 33, "time": 303.91 }, { "epoch": 0.04, "learning_rate": "8.7179e-06", "loss": 0.6276, "slid_loss": 0.6051, "step": 34, "time": 300.64 }, { "epoch": 0.05, "learning_rate": "8.9744e-06", "loss": 0.6095, "slid_loss": 0.6052, "step": 35, "time": 293.11 }, { "epoch": 0.05, "learning_rate": "9.2308e-06", "loss": 0.6124, "slid_loss": 0.6054, "step": 36, "time": 302.12 }, { "epoch": 0.05, "learning_rate": "9.4872e-06", "loss": 0.5959, "slid_loss": 0.6052, "step": 37, "time": 300.08 }, { "epoch": 0.05, "learning_rate": "9.7436e-06", "loss": 0.6022, "slid_loss": 0.6051, "step": 38, "time": 301.67 }, { "epoch": 0.05, "learning_rate": "1.0000e-05", "loss": 0.5919, "slid_loss": 0.6048, "step": 39, "time": 300.9 }, { "epoch": 0.05, "learning_rate": "1.0000e-05", "loss": 0.5992, "slid_loss": 0.6046, "step": 40, "time": 303.93 }, { "epoch": 0.05, "learning_rate": "1.0000e-05", "loss": 0.581, "slid_loss": 0.6041, "step": 41, "time": 303.43 }, { "epoch": 0.05, "learning_rate": "1.0000e-05", "loss": 0.5893, "slid_loss": 0.6037, "step": 42, "time": 296.02 }, { "epoch": 0.06, "learning_rate": "1.0000e-05", "loss": 0.6011, "slid_loss": 0.6036, "step": 43, "time": 296.37 }, { "epoch": 0.06, "learning_rate": "1.0000e-05", "loss": 0.619, "slid_loss": 0.604, "step": 44, "time": 332.28 }, { "epoch": 0.06, "learning_rate": "1.0000e-05", "loss": 0.5861, "slid_loss": 0.6036, "step": 45, "time": 299.29 }, { "epoch": 0.06, "learning_rate": "1.0000e-05", "loss": 0.5954, "slid_loss": 0.6034, "step": 46, "time": 303.55 }, { "epoch": 0.06, "learning_rate": "9.9999e-06", "loss": 0.6072, "slid_loss": 0.6035, "step": 47, "time": 304.38 }, { "epoch": 0.06, "learning_rate": "9.9999e-06", "loss": 0.5787, "slid_loss": 0.603, "step": 48, "time": 304.24 }, { "epoch": 0.06, "learning_rate": "9.9999e-06", "loss": 0.5957, "slid_loss": 0.6028, "step": 49, "time": 298.5 }, { "epoch": 0.06, "learning_rate": "9.9999e-06", "loss": 0.6328, "slid_loss": 0.6034, "step": 50, "time": 303.53 }, { "epoch": 0.07, "learning_rate": "9.9999e-06", "loss": 0.581, "slid_loss": 0.603, "step": 51, "time": 302.2 }, { "epoch": 0.07, "learning_rate": "9.9999e-06", "loss": 0.6062, "slid_loss": 0.6031, "step": 52, "time": 301.68 }, { "epoch": 0.07, "learning_rate": "9.9998e-06", "loss": 0.6003, "slid_loss": 0.603, "step": 53, "time": 296.0 }, { "epoch": 0.07, "learning_rate": "9.9998e-06", "loss": 0.6157, "slid_loss": 0.6032, "step": 54, "time": 299.54 }, { "epoch": 0.07, "learning_rate": "9.9998e-06", "loss": 0.604, "slid_loss": 0.6032, "step": 55, "time": 299.1 }, { "epoch": 0.07, "learning_rate": "9.9998e-06", "loss": 0.5752, "slid_loss": 0.6027, "step": 56, "time": 294.14 }, { "epoch": 0.07, "learning_rate": "9.9997e-06", "loss": 0.5931, "slid_loss": 0.6026, "step": 57, "time": 381.96 }, { "epoch": 0.08, "learning_rate": "9.9997e-06", "loss": 0.5937, "slid_loss": 0.6024, "step": 58, "time": 293.92 }, { "epoch": 0.08, "learning_rate": "9.9997e-06", "loss": 0.6232, "slid_loss": 0.6028, "step": 59, "time": 304.51 }, { "epoch": 0.08, "learning_rate": "9.9996e-06", "loss": 0.6031, "slid_loss": 0.6028, "step": 60, "time": 306.02 }, { "epoch": 0.08, "learning_rate": "9.9996e-06", "loss": 0.601, "slid_loss": 0.6028, "step": 61, "time": 301.96 }, { "epoch": 0.08, "learning_rate": "9.9996e-06", "loss": 0.6268, "slid_loss": 0.6031, "step": 62, "time": 302.5 }, { "epoch": 0.08, "learning_rate": "9.9995e-06", "loss": 0.6078, "slid_loss": 0.6032, "step": 63, "time": 302.51 }, { "epoch": 0.08, "learning_rate": "9.9995e-06", "loss": 0.6252, "slid_loss": 0.6036, "step": 64, "time": 297.93 }, { "epoch": 0.08, "learning_rate": "9.9994e-06", "loss": 0.6007, "slid_loss": 0.6035, "step": 65, "time": 302.92 }, { "epoch": 0.09, "learning_rate": "9.9994e-06", "loss": 0.5985, "slid_loss": 0.6034, "step": 66, "time": 298.35 }, { "epoch": 0.09, "learning_rate": "9.9993e-06", "loss": 0.5966, "slid_loss": 0.6033, "step": 67, "time": 300.99 }, { "epoch": 0.09, "learning_rate": "9.9993e-06", "loss": 0.5678, "slid_loss": 0.6028, "step": 68, "time": 296.29 }, { "epoch": 0.09, "learning_rate": "9.9992e-06", "loss": 0.5818, "slid_loss": 0.6025, "step": 69, "time": 294.95 }, { "epoch": 0.09, "learning_rate": "9.9992e-06", "loss": 0.6138, "slid_loss": 0.6027, "step": 70, "time": 298.16 }, { "epoch": 0.09, "learning_rate": "9.9991e-06", "loss": 0.6071, "slid_loss": 0.6027, "step": 71, "time": 302.87 }, { "epoch": 0.09, "learning_rate": "9.9991e-06", "loss": 0.6126, "slid_loss": 0.6029, "step": 72, "time": 302.98 }, { "epoch": 0.09, "learning_rate": "9.9990e-06", "loss": 0.5945, "slid_loss": 0.6028, "step": 73, "time": 429.67 }, { "epoch": 0.1, "learning_rate": "9.9990e-06", "loss": 0.5973, "slid_loss": 0.6027, "step": 74, "time": 303.2 }, { "epoch": 0.1, "learning_rate": "9.9989e-06", "loss": 0.6045, "slid_loss": 0.6027, "step": 75, "time": 298.66 }, { "epoch": 0.1, "learning_rate": "9.9988e-06", "loss": 0.5884, "slid_loss": 0.6025, "step": 76, "time": 305.59 }, { "epoch": 0.1, "learning_rate": "9.9988e-06", "loss": 0.6009, "slid_loss": 0.6025, "step": 77, "time": 300.33 }, { "epoch": 0.1, "learning_rate": "9.9987e-06", "loss": 0.6005, "slid_loss": 0.6025, "step": 78, "time": 300.6 }, { "epoch": 0.1, "learning_rate": "9.9986e-06", "loss": 0.5811, "slid_loss": 0.6022, "step": 79, "time": 304.71 }, { "epoch": 0.1, "learning_rate": "9.9986e-06", "loss": 0.5954, "slid_loss": 0.6021, "step": 80, "time": 295.84 }, { "epoch": 0.1, "learning_rate": "9.9985e-06", "loss": 0.5864, "slid_loss": 0.6019, "step": 81, "time": 307.04 }, { "epoch": 0.11, "learning_rate": "9.9984e-06", "loss": 0.6053, "slid_loss": 0.602, "step": 82, "time": 301.52 }, { "epoch": 0.11, "learning_rate": "9.9984e-06", "loss": 0.5725, "slid_loss": 0.6016, "step": 83, "time": 297.7 }, { "epoch": 0.11, "learning_rate": "9.9983e-06", "loss": 0.6035, "slid_loss": 0.6016, "step": 84, "time": 304.19 }, { "epoch": 0.11, "learning_rate": "9.9982e-06", "loss": 0.6073, "slid_loss": 0.6017, "step": 85, "time": 338.5 }, { "epoch": 0.11, "learning_rate": "9.9981e-06", "loss": 0.6044, "slid_loss": 0.6017, "step": 86, "time": 296.47 }, { "epoch": 0.11, "learning_rate": "9.9981e-06", "loss": 0.6035, "slid_loss": 0.6017, "step": 87, "time": 300.44 }, { "epoch": 0.11, "learning_rate": "9.9980e-06", "loss": 0.6143, "slid_loss": 0.6019, "step": 88, "time": 299.11 }, { "epoch": 0.12, "learning_rate": "9.9979e-06", "loss": 0.6097, "slid_loss": 0.602, "step": 89, "time": 299.75 }, { "epoch": 0.12, "learning_rate": "9.9978e-06", "loss": 0.5806, "slid_loss": 0.6017, "step": 90, "time": 301.43 }, { "epoch": 0.12, "learning_rate": "9.9977e-06", "loss": 0.5961, "slid_loss": 0.6017, "step": 91, "time": 304.29 }, { "epoch": 0.12, "learning_rate": "9.9976e-06", "loss": 0.6096, "slid_loss": 0.6018, "step": 92, "time": 303.86 }, { "epoch": 0.12, "learning_rate": "9.9975e-06", "loss": 0.625, "slid_loss": 0.602, "step": 93, "time": 302.51 }, { "epoch": 0.12, "learning_rate": "9.9974e-06", "loss": 0.5965, "slid_loss": 0.602, "step": 94, "time": 301.0 }, { "epoch": 0.12, "learning_rate": "9.9974e-06", "loss": 0.6001, "slid_loss": 0.6019, "step": 95, "time": 299.4 }, { "epoch": 0.12, "learning_rate": "9.9973e-06", "loss": 0.6154, "slid_loss": 0.6021, "step": 96, "time": 301.42 }, { "epoch": 0.13, "learning_rate": "9.9972e-06", "loss": 0.6055, "slid_loss": 0.6021, "step": 97, "time": 304.69 }, { "epoch": 0.13, "learning_rate": "9.9971e-06", "loss": 0.5986, "slid_loss": 0.6021, "step": 98, "time": 301.12 }, { "epoch": 0.13, "learning_rate": "9.9970e-06", "loss": 0.6033, "slid_loss": 0.6021, "step": 99, "time": 302.85 }, { "epoch": 0.13, "learning_rate": "9.9969e-06", "loss": 0.5926, "slid_loss": 0.602, "step": 100, "time": 298.85 }, { "epoch": 0.13, "learning_rate": "9.9968e-06", "loss": 0.5972, "slid_loss": 0.602, "step": 101, "time": 2115.7 }, { "epoch": 0.13, "learning_rate": "9.9966e-06", "loss": 0.6335, "slid_loss": 0.6021, "step": 102, "time": 297.71 }, { "epoch": 0.13, "learning_rate": "9.9965e-06", "loss": 0.6178, "slid_loss": 0.6022, "step": 103, "time": 296.19 }, { "epoch": 0.13, "learning_rate": "9.9964e-06", "loss": 0.5811, "slid_loss": 0.6018, "step": 104, "time": 297.39 }, { "epoch": 0.14, "learning_rate": "9.9963e-06", "loss": 0.6264, "slid_loss": 0.6018, "step": 105, "time": 302.43 }, { "epoch": 0.14, "learning_rate": "9.9962e-06", "loss": 0.5883, "slid_loss": 0.6017, "step": 106, "time": 297.74 }, { "epoch": 0.14, "learning_rate": "9.9961e-06", "loss": 0.5763, "slid_loss": 0.6013, "step": 107, "time": 295.11 }, { "epoch": 0.14, "learning_rate": "9.9960e-06", "loss": 0.5981, "slid_loss": 0.6015, "step": 108, "time": 297.78 }, { "epoch": 0.14, "learning_rate": "9.9959e-06", "loss": 0.5965, "slid_loss": 0.6012, "step": 109, "time": 303.32 }, { "epoch": 0.14, "learning_rate": "9.9957e-06", "loss": 0.5904, "slid_loss": 0.601, "step": 110, "time": 305.55 }, { "epoch": 0.14, "learning_rate": "9.9956e-06", "loss": 0.6257, "slid_loss": 0.6011, "step": 111, "time": 309.15 }, { "epoch": 0.15, "learning_rate": "9.9955e-06", "loss": 0.596, "slid_loss": 0.6008, "step": 112, "time": 295.49 }, { "epoch": 0.15, "learning_rate": "9.9954e-06", "loss": 0.6059, "slid_loss": 0.6009, "step": 113, "time": 297.05 }, { "epoch": 0.15, "learning_rate": "9.9952e-06", "loss": 0.5955, "slid_loss": 0.6007, "step": 114, "time": 298.43 }, { "epoch": 0.15, "learning_rate": "9.9951e-06", "loss": 0.5853, "slid_loss": 0.6009, "step": 115, "time": 298.88 }, { "epoch": 0.15, "learning_rate": "9.9950e-06", "loss": 0.5772, "slid_loss": 0.6009, "step": 116, "time": 303.47 }, { "epoch": 0.15, "learning_rate": "9.9949e-06", "loss": 0.6111, "slid_loss": 0.6011, "step": 117, "time": 297.58 }, { "epoch": 0.15, "learning_rate": "9.9947e-06", "loss": 0.5853, "slid_loss": 0.601, "step": 118, "time": 303.99 }, { "epoch": 0.15, "learning_rate": "9.9946e-06", "loss": 0.5996, "slid_loss": 0.6008, "step": 119, "time": 298.4 }, { "epoch": 0.16, "learning_rate": "9.9945e-06", "loss": 0.6205, "slid_loss": 0.6006, "step": 120, "time": 344.13 }, { "epoch": 0.16, "learning_rate": "9.9943e-06", "loss": 0.5966, "slid_loss": 0.6004, "step": 121, "time": 634.45 }, { "epoch": 0.16, "learning_rate": "9.9942e-06", "loss": 0.5996, "slid_loss": 0.6007, "step": 122, "time": 298.46 }, { "epoch": 0.16, "learning_rate": "9.9940e-06", "loss": 0.6048, "slid_loss": 0.6007, "step": 123, "time": 305.47 }, { "epoch": 0.16, "learning_rate": "9.9939e-06", "loss": 0.6087, "slid_loss": 0.6008, "step": 124, "time": 298.35 }, { "epoch": 0.16, "learning_rate": "9.9938e-06", "loss": 0.5717, "slid_loss": 0.6007, "step": 125, "time": 304.29 }, { "epoch": 0.16, "learning_rate": "9.9936e-06", "loss": 0.587, "slid_loss": 0.6006, "step": 126, "time": 299.88 }, { "epoch": 0.16, "learning_rate": "9.9935e-06", "loss": 0.5759, "slid_loss": 0.6003, "step": 127, "time": 298.54 }, { "epoch": 0.17, "learning_rate": "9.9933e-06", "loss": 0.5657, "slid_loss": 0.5999, "step": 128, "time": 297.77 }, { "epoch": 0.17, "learning_rate": "9.9932e-06", "loss": 0.6105, "slid_loss": 0.5999, "step": 129, "time": 295.44 }, { "epoch": 0.17, "learning_rate": "9.9930e-06", "loss": 0.5847, "slid_loss": 0.5996, "step": 130, "time": 304.95 }, { "epoch": 0.17, "learning_rate": "9.9929e-06", "loss": 0.58, "slid_loss": 0.5995, "step": 131, "time": 301.05 }, { "epoch": 0.17, "learning_rate": "9.9927e-06", "loss": 0.5847, "slid_loss": 0.5992, "step": 132, "time": 302.39 }, { "epoch": 0.17, "learning_rate": "9.9925e-06", "loss": 0.6222, "slid_loss": 0.5995, "step": 133, "time": 306.34 }, { "epoch": 0.17, "learning_rate": "9.9924e-06", "loss": 0.6114, "slid_loss": 0.5994, "step": 134, "time": 299.73 }, { "epoch": 0.17, "learning_rate": "9.9922e-06", "loss": 0.5823, "slid_loss": 0.5991, "step": 135, "time": 340.32 }, { "epoch": 0.18, "learning_rate": "9.9921e-06", "loss": 0.5989, "slid_loss": 0.599, "step": 136, "time": 302.77 }, { "epoch": 0.18, "learning_rate": "9.9919e-06", "loss": 0.5885, "slid_loss": 0.5989, "step": 137, "time": 300.49 }, { "epoch": 0.18, "learning_rate": "9.9917e-06", "loss": 0.6046, "slid_loss": 0.5989, "step": 138, "time": 300.88 }, { "epoch": 0.18, "learning_rate": "9.9916e-06", "loss": 0.6067, "slid_loss": 0.5991, "step": 139, "time": 302.46 }, { "epoch": 0.18, "learning_rate": "9.9914e-06", "loss": 0.5841, "slid_loss": 0.5989, "step": 140, "time": 299.23 }, { "epoch": 0.18, "learning_rate": "9.9912e-06", "loss": 0.5965, "slid_loss": 0.5991, "step": 141, "time": 304.79 }, { "epoch": 0.18, "learning_rate": "9.9910e-06", "loss": 0.6078, "slid_loss": 0.5992, "step": 142, "time": 303.42 }, { "epoch": 0.19, "learning_rate": "9.9909e-06", "loss": 0.5876, "slid_loss": 0.5991, "step": 143, "time": 300.34 }, { "epoch": 0.19, "learning_rate": "9.9907e-06", "loss": 0.5699, "slid_loss": 0.5986, "step": 144, "time": 301.76 }, { "epoch": 0.19, "learning_rate": "9.9905e-06", "loss": 0.594, "slid_loss": 0.5987, "step": 145, "time": 301.5 }, { "epoch": 0.19, "learning_rate": "9.9903e-06", "loss": 0.5813, "slid_loss": 0.5986, "step": 146, "time": 305.53 }, { "epoch": 0.19, "learning_rate": "9.9902e-06", "loss": 0.6037, "slid_loss": 0.5985, "step": 147, "time": 299.21 }, { "epoch": 0.19, "learning_rate": "9.9900e-06", "loss": 0.6152, "slid_loss": 0.5989, "step": 148, "time": 302.99 }, { "epoch": 0.19, "learning_rate": "9.9898e-06", "loss": 0.5807, "slid_loss": 0.5987, "step": 149, "time": 300.63 }, { "epoch": 0.19, "learning_rate": "9.9896e-06", "loss": 0.6118, "slid_loss": 0.5985, "step": 150, "time": 300.98 }, { "epoch": 0.2, "learning_rate": "9.9894e-06", "loss": 0.6146, "slid_loss": 0.5989, "step": 151, "time": 302.41 }, { "epoch": 0.2, "learning_rate": "9.9892e-06", "loss": 0.5905, "slid_loss": 0.5987, "step": 152, "time": 302.49 }, { "epoch": 0.2, "learning_rate": "9.9890e-06", "loss": 0.5948, "slid_loss": 0.5987, "step": 153, "time": 298.31 }, { "epoch": 0.2, "learning_rate": "9.9888e-06", "loss": 0.5909, "slid_loss": 0.5984, "step": 154, "time": 303.58 }, { "epoch": 0.2, "learning_rate": "9.9886e-06", "loss": 0.5935, "slid_loss": 0.5983, "step": 155, "time": 300.33 }, { "epoch": 0.2, "learning_rate": "9.9884e-06", "loss": 0.5986, "slid_loss": 0.5985, "step": 156, "time": 302.57 }, { "epoch": 0.2, "learning_rate": "9.9882e-06", "loss": 0.5854, "slid_loss": 0.5985, "step": 157, "time": 300.71 }, { "epoch": 0.2, "learning_rate": "9.9880e-06", "loss": 0.5969, "slid_loss": 0.5985, "step": 158, "time": 303.74 }, { "epoch": 0.21, "learning_rate": "9.9878e-06", "loss": 0.5823, "slid_loss": 0.5981, "step": 159, "time": 297.54 }, { "epoch": 0.21, "learning_rate": "9.9876e-06", "loss": 0.6209, "slid_loss": 0.5983, "step": 160, "time": 301.04 }, { "epoch": 0.21, "learning_rate": "9.9874e-06", "loss": 0.6104, "slid_loss": 0.5984, "step": 161, "time": 299.43 }, { "epoch": 0.21, "learning_rate": "9.9872e-06", "loss": 0.6082, "slid_loss": 0.5982, "step": 162, "time": 303.51 }, { "epoch": 0.21, "learning_rate": "9.9870e-06", "loss": 0.594, "slid_loss": 0.598, "step": 163, "time": 299.33 }, { "epoch": 0.21, "learning_rate": "9.9868e-06", "loss": 0.5984, "slid_loss": 0.5978, "step": 164, "time": 300.6 }, { "epoch": 0.21, "learning_rate": "9.9866e-06", "loss": 0.5662, "slid_loss": 0.5974, "step": 165, "time": 300.92 }, { "epoch": 0.21, "learning_rate": "9.9864e-06", "loss": 0.5915, "slid_loss": 0.5973, "step": 166, "time": 302.81 }, { "epoch": 0.22, "learning_rate": "9.9862e-06", "loss": 0.5758, "slid_loss": 0.5971, "step": 167, "time": 297.08 }, { "epoch": 0.22, "learning_rate": "9.9860e-06", "loss": 0.5816, "slid_loss": 0.5973, "step": 168, "time": 299.58 }, { "epoch": 0.22, "learning_rate": "9.9857e-06", "loss": 0.6093, "slid_loss": 0.5976, "step": 169, "time": 298.08 }, { "epoch": 0.22, "learning_rate": "9.9855e-06", "loss": 0.5916, "slid_loss": 0.5973, "step": 170, "time": 299.47 }, { "epoch": 0.22, "learning_rate": "9.9853e-06", "loss": 0.6068, "slid_loss": 0.5973, "step": 171, "time": 301.28 }, { "epoch": 0.22, "learning_rate": "9.9851e-06", "loss": 0.5746, "slid_loss": 0.5969, "step": 172, "time": 298.82 }, { "epoch": 0.22, "learning_rate": "9.9848e-06", "loss": 0.5839, "slid_loss": 0.5968, "step": 173, "time": 297.42 }, { "epoch": 0.23, "learning_rate": "9.9846e-06", "loss": 0.606, "slid_loss": 0.5969, "step": 174, "time": 300.27 }, { "epoch": 0.23, "learning_rate": "9.9844e-06", "loss": 0.5745, "slid_loss": 0.5966, "step": 175, "time": 299.0 }, { "epoch": 0.23, "learning_rate": "9.9842e-06", "loss": 0.5781, "slid_loss": 0.5965, "step": 176, "time": 301.89 }, { "epoch": 0.23, "learning_rate": "9.9839e-06", "loss": 0.6075, "slid_loss": 0.5966, "step": 177, "time": 302.45 }, { "epoch": 0.23, "learning_rate": "9.9837e-06", "loss": 0.5993, "slid_loss": 0.5966, "step": 178, "time": 303.55 }, { "epoch": 0.23, "learning_rate": "9.9835e-06", "loss": 0.5941, "slid_loss": 0.5967, "step": 179, "time": 300.89 }, { "epoch": 0.23, "learning_rate": "9.9832e-06", "loss": 0.5923, "slid_loss": 0.5967, "step": 180, "time": 296.88 }, { "epoch": 0.23, "learning_rate": "9.9830e-06", "loss": 0.6073, "slid_loss": 0.5969, "step": 181, "time": 297.73 }, { "epoch": 0.24, "learning_rate": "9.9827e-06", "loss": 0.5899, "slid_loss": 0.5967, "step": 182, "time": 304.98 }, { "epoch": 0.24, "learning_rate": "9.9825e-06", "loss": 0.6225, "slid_loss": 0.5972, "step": 183, "time": 299.1 }, { "epoch": 0.24, "learning_rate": "9.9823e-06", "loss": 0.5727, "slid_loss": 0.5969, "step": 184, "time": 299.82 }, { "epoch": 0.24, "learning_rate": "9.9820e-06", "loss": 0.5644, "slid_loss": 0.5965, "step": 185, "time": 302.65 }, { "epoch": 0.24, "learning_rate": "9.9818e-06", "loss": 0.6012, "slid_loss": 0.5965, "step": 186, "time": 294.28 }, { "epoch": 0.24, "learning_rate": "9.9815e-06", "loss": 0.583, "slid_loss": 0.5963, "step": 187, "time": 300.57 }, { "epoch": 0.24, "learning_rate": "9.9813e-06", "loss": 0.6199, "slid_loss": 0.5963, "step": 188, "time": 300.21 }, { "epoch": 0.24, "learning_rate": "9.9810e-06", "loss": 0.5737, "slid_loss": 0.596, "step": 189, "time": 298.53 }, { "epoch": 0.25, "learning_rate": "9.9808e-06", "loss": 0.6084, "slid_loss": 0.5962, "step": 190, "time": 303.74 }, { "epoch": 0.25, "learning_rate": "9.9805e-06", "loss": 0.587, "slid_loss": 0.5961, "step": 191, "time": 303.28 }, { "epoch": 0.25, "learning_rate": "9.9802e-06", "loss": 0.5778, "slid_loss": 0.5958, "step": 192, "time": 296.74 }, { "epoch": 0.25, "learning_rate": "9.9800e-06", "loss": 0.5752, "slid_loss": 0.5953, "step": 193, "time": 301.16 }, { "epoch": 0.25, "learning_rate": "9.9797e-06", "loss": 0.6127, "slid_loss": 0.5955, "step": 194, "time": 296.39 }, { "epoch": 0.25, "learning_rate": "9.9795e-06", "loss": 0.5864, "slid_loss": 0.5953, "step": 195, "time": 298.05 }, { "epoch": 0.25, "learning_rate": "9.9792e-06", "loss": 0.5788, "slid_loss": 0.595, "step": 196, "time": 297.51 }, { "epoch": 0.26, "learning_rate": "9.9789e-06", "loss": 0.5977, "slid_loss": 0.5949, "step": 197, "time": 299.48 }, { "epoch": 0.26, "learning_rate": "9.9787e-06", "loss": 0.568, "slid_loss": 0.5946, "step": 198, "time": 303.2 }, { "epoch": 0.26, "learning_rate": "9.9784e-06", "loss": 0.5954, "slid_loss": 0.5945, "step": 199, "time": 298.52 }, { "epoch": 0.26, "learning_rate": "9.9781e-06", "loss": 0.618, "slid_loss": 0.5948, "step": 200, "time": 297.2 }, { "epoch": 0.26, "learning_rate": "9.9779e-06", "loss": 0.568, "slid_loss": 0.5945, "step": 201, "time": 2159.56 }, { "epoch": 0.26, "learning_rate": "9.9776e-06", "loss": 0.5864, "slid_loss": 0.594, "step": 202, "time": 304.84 }, { "epoch": 0.26, "learning_rate": "9.9773e-06", "loss": 0.5866, "slid_loss": 0.5937, "step": 203, "time": 301.84 }, { "epoch": 0.26, "learning_rate": "9.9770e-06", "loss": 0.633, "slid_loss": 0.5942, "step": 204, "time": 300.37 }, { "epoch": 0.27, "learning_rate": "9.9768e-06", "loss": 0.5984, "slid_loss": 0.5939, "step": 205, "time": 298.58 }, { "epoch": 0.27, "learning_rate": "9.9765e-06", "loss": 0.5859, "slid_loss": 0.5939, "step": 206, "time": 298.88 }, { "epoch": 0.27, "learning_rate": "9.9762e-06", "loss": 0.5895, "slid_loss": 0.594, "step": 207, "time": 302.29 }, { "epoch": 0.27, "learning_rate": "9.9759e-06", "loss": 0.5931, "slid_loss": 0.594, "step": 208, "time": 295.71 }, { "epoch": 0.27, "learning_rate": "9.9756e-06", "loss": 0.586, "slid_loss": 0.5939, "step": 209, "time": 297.38 }, { "epoch": 0.27, "learning_rate": "9.9753e-06", "loss": 0.5702, "slid_loss": 0.5937, "step": 210, "time": 292.88 }, { "epoch": 0.27, "learning_rate": "9.9750e-06", "loss": 0.5841, "slid_loss": 0.5933, "step": 211, "time": 302.19 }, { "epoch": 0.27, "learning_rate": "9.9748e-06", "loss": 0.5839, "slid_loss": 0.5932, "step": 212, "time": 295.71 }, { "epoch": 0.28, "learning_rate": "9.9745e-06", "loss": 0.6085, "slid_loss": 0.5932, "step": 213, "time": 301.35 }, { "epoch": 0.28, "learning_rate": "9.9742e-06", "loss": 0.5931, "slid_loss": 0.5932, "step": 214, "time": 297.9 }, { "epoch": 0.28, "learning_rate": "9.9739e-06", "loss": 0.6003, "slid_loss": 0.5933, "step": 215, "time": 305.13 }, { "epoch": 0.28, "learning_rate": "9.9736e-06", "loss": 0.5904, "slid_loss": 0.5934, "step": 216, "time": 299.72 }, { "epoch": 0.28, "learning_rate": "9.9733e-06", "loss": 0.5789, "slid_loss": 0.5931, "step": 217, "time": 294.25 }, { "epoch": 0.28, "learning_rate": "9.9730e-06", "loss": 0.5975, "slid_loss": 0.5932, "step": 218, "time": 300.49 }, { "epoch": 0.28, "learning_rate": "9.9727e-06", "loss": 0.5814, "slid_loss": 0.5931, "step": 219, "time": 300.88 }, { "epoch": 0.28, "learning_rate": "9.9724e-06", "loss": 0.6156, "slid_loss": 0.593, "step": 220, "time": 298.38 }, { "epoch": 0.29, "learning_rate": "9.9721e-06", "loss": 0.5958, "slid_loss": 0.593, "step": 221, "time": 295.82 }, { "epoch": 0.29, "learning_rate": "9.9718e-06", "loss": 0.5821, "slid_loss": 0.5928, "step": 222, "time": 297.92 }, { "epoch": 0.29, "learning_rate": "9.9714e-06", "loss": 0.6157, "slid_loss": 0.5929, "step": 223, "time": 301.05 }, { "epoch": 0.29, "learning_rate": "9.9711e-06", "loss": 0.5596, "slid_loss": 0.5924, "step": 224, "time": 300.03 }, { "epoch": 0.29, "learning_rate": "9.9708e-06", "loss": 0.6077, "slid_loss": 0.5928, "step": 225, "time": 302.72 }, { "epoch": 0.29, "learning_rate": "9.9705e-06", "loss": 0.6011, "slid_loss": 0.5929, "step": 226, "time": 300.68 }, { "epoch": 0.29, "learning_rate": "9.9702e-06", "loss": 0.5792, "slid_loss": 0.593, "step": 227, "time": 301.7 }, { "epoch": 0.3, "learning_rate": "9.9699e-06", "loss": 0.5819, "slid_loss": 0.5931, "step": 228, "time": 307.53 }, { "epoch": 0.3, "learning_rate": "9.9696e-06", "loss": 0.5668, "slid_loss": 0.5927, "step": 229, "time": 298.86 }, { "epoch": 0.3, "learning_rate": "9.9692e-06", "loss": 0.5833, "slid_loss": 0.5927, "step": 230, "time": 301.51 }, { "epoch": 0.3, "learning_rate": "9.9689e-06", "loss": 0.5911, "slid_loss": 0.5928, "step": 231, "time": 301.86 }, { "epoch": 0.3, "learning_rate": "9.9686e-06", "loss": 0.6039, "slid_loss": 0.593, "step": 232, "time": 301.91 }, { "epoch": 0.3, "learning_rate": "9.9683e-06", "loss": 0.6113, "slid_loss": 0.5929, "step": 233, "time": 299.99 }, { "epoch": 0.3, "learning_rate": "9.9679e-06", "loss": 0.5821, "slid_loss": 0.5926, "step": 234, "time": 300.63 }, { "epoch": 0.3, "learning_rate": "9.9676e-06", "loss": 0.584, "slid_loss": 0.5926, "step": 235, "time": 294.85 }, { "epoch": 0.31, "learning_rate": "9.9673e-06", "loss": 0.593, "slid_loss": 0.5925, "step": 236, "time": 300.63 }, { "epoch": 0.31, "learning_rate": "9.9669e-06", "loss": 0.5875, "slid_loss": 0.5925, "step": 237, "time": 301.49 }, { "epoch": 0.31, "learning_rate": "9.9666e-06", "loss": 0.6002, "slid_loss": 0.5925, "step": 238, "time": 300.88 }, { "epoch": 0.31, "learning_rate": "9.9663e-06", "loss": 0.5786, "slid_loss": 0.5922, "step": 239, "time": 300.01 }, { "epoch": 0.31, "learning_rate": "9.9659e-06", "loss": 0.596, "slid_loss": 0.5923, "step": 240, "time": 299.06 }, { "epoch": 0.31, "learning_rate": "9.9656e-06", "loss": 0.5881, "slid_loss": 0.5922, "step": 241, "time": 398.92 }, { "epoch": 0.31, "learning_rate": "9.9653e-06", "loss": 0.5988, "slid_loss": 0.5921, "step": 242, "time": 512.79 }, { "epoch": 0.31, "learning_rate": "9.9649e-06", "loss": 0.5969, "slid_loss": 0.5922, "step": 243, "time": 333.47 }, { "epoch": 0.32, "learning_rate": "9.9646e-06", "loss": 0.58, "slid_loss": 0.5923, "step": 244, "time": 303.21 }, { "epoch": 0.32, "learning_rate": "9.9642e-06", "loss": 0.5971, "slid_loss": 0.5924, "step": 245, "time": 296.87 }, { "epoch": 0.32, "learning_rate": "9.9639e-06", "loss": 0.5809, "slid_loss": 0.5924, "step": 246, "time": 303.53 }, { "epoch": 0.32, "learning_rate": "9.9635e-06", "loss": 0.6039, "slid_loss": 0.5924, "step": 247, "time": 304.38 }, { "epoch": 0.32, "learning_rate": "9.9632e-06", "loss": 0.6062, "slid_loss": 0.5923, "step": 248, "time": 299.2 }, { "epoch": 0.32, "learning_rate": "9.9628e-06", "loss": 0.5842, "slid_loss": 0.5923, "step": 249, "time": 295.64 }, { "epoch": 0.32, "learning_rate": "9.9625e-06", "loss": 0.6137, "slid_loss": 0.5923, "step": 250, "time": 300.97 }, { "epoch": 0.33, "learning_rate": "9.9621e-06", "loss": 0.5899, "slid_loss": 0.5921, "step": 251, "time": 304.22 }, { "epoch": 0.33, "learning_rate": "9.9618e-06", "loss": 0.5839, "slid_loss": 0.592, "step": 252, "time": 300.56 }, { "epoch": 0.33, "learning_rate": "9.9614e-06", "loss": 0.6023, "slid_loss": 0.5921, "step": 253, "time": 302.46 }, { "epoch": 0.33, "learning_rate": "9.9610e-06", "loss": 0.6089, "slid_loss": 0.5923, "step": 254, "time": 300.0 }, { "epoch": 0.33, "learning_rate": "9.9607e-06", "loss": 0.6055, "slid_loss": 0.5924, "step": 255, "time": 300.65 }, { "epoch": 0.33, "learning_rate": "9.9603e-06", "loss": 0.5927, "slid_loss": 0.5923, "step": 256, "time": 296.14 }, { "epoch": 0.33, "learning_rate": "9.9599e-06", "loss": 0.5967, "slid_loss": 0.5925, "step": 257, "time": 294.8 }, { "epoch": 0.33, "learning_rate": "9.9596e-06", "loss": 0.5717, "slid_loss": 0.5922, "step": 258, "time": 303.38 }, { "epoch": 0.34, "learning_rate": "9.9592e-06", "loss": 0.5853, "slid_loss": 0.5922, "step": 259, "time": 301.95 }, { "epoch": 0.34, "learning_rate": "9.9588e-06", "loss": 0.5944, "slid_loss": 0.592, "step": 260, "time": 300.31 }, { "epoch": 0.34, "learning_rate": "9.9585e-06", "loss": 0.6069, "slid_loss": 0.5919, "step": 261, "time": 303.26 }, { "epoch": 0.34, "learning_rate": "9.9581e-06", "loss": 0.5901, "slid_loss": 0.5917, "step": 262, "time": 301.92 }, { "epoch": 0.34, "learning_rate": "9.9577e-06", "loss": 0.604, "slid_loss": 0.5918, "step": 263, "time": 302.96 }, { "epoch": 0.34, "learning_rate": "9.9573e-06", "loss": 0.5833, "slid_loss": 0.5917, "step": 264, "time": 301.58 }, { "epoch": 0.34, "learning_rate": "9.9570e-06", "loss": 0.5949, "slid_loss": 0.592, "step": 265, "time": 301.0 }, { "epoch": 0.34, "learning_rate": "9.9566e-06", "loss": 0.5695, "slid_loss": 0.5918, "step": 266, "time": 300.87 }, { "epoch": 0.35, "learning_rate": "9.9562e-06", "loss": 0.5978, "slid_loss": 0.592, "step": 267, "time": 299.95 }, { "epoch": 0.35, "learning_rate": "9.9558e-06", "loss": 0.5796, "slid_loss": 0.592, "step": 268, "time": 293.55 }, { "epoch": 0.35, "learning_rate": "9.9554e-06", "loss": 0.6032, "slid_loss": 0.5919, "step": 269, "time": 302.36 }, { "epoch": 0.35, "learning_rate": "9.9550e-06", "loss": 0.6002, "slid_loss": 0.592, "step": 270, "time": 303.01 }, { "epoch": 0.35, "learning_rate": "9.9547e-06", "loss": 0.6025, "slid_loss": 0.5919, "step": 271, "time": 303.25 }, { "epoch": 0.35, "learning_rate": "9.9543e-06", "loss": 0.6, "slid_loss": 0.5922, "step": 272, "time": 303.51 }, { "epoch": 0.35, "learning_rate": "9.9539e-06", "loss": 0.5961, "slid_loss": 0.5923, "step": 273, "time": 300.67 }, { "epoch": 0.35, "learning_rate": "9.9535e-06", "loss": 0.5879, "slid_loss": 0.5921, "step": 274, "time": 298.22 }, { "epoch": 0.36, "learning_rate": "9.9531e-06", "loss": 0.5915, "slid_loss": 0.5923, "step": 275, "time": 294.33 }, { "epoch": 0.36, "learning_rate": "9.9527e-06", "loss": 0.5785, "slid_loss": 0.5923, "step": 276, "time": 303.84 }, { "epoch": 0.36, "learning_rate": "9.9523e-06", "loss": 0.5918, "slid_loss": 0.5922, "step": 277, "time": 301.67 }, { "epoch": 0.36, "learning_rate": "9.9519e-06", "loss": 0.5767, "slid_loss": 0.5919, "step": 278, "time": 298.92 }, { "epoch": 0.36, "learning_rate": "9.9515e-06", "loss": 0.5884, "slid_loss": 0.5919, "step": 279, "time": 301.91 }, { "epoch": 0.36, "learning_rate": "9.9511e-06", "loss": 0.5535, "slid_loss": 0.5915, "step": 280, "time": 305.25 }, { "epoch": 0.36, "learning_rate": "9.9507e-06", "loss": 0.5793, "slid_loss": 0.5912, "step": 281, "time": 304.08 }, { "epoch": 0.37, "learning_rate": "9.9503e-06", "loss": 0.6179, "slid_loss": 0.5915, "step": 282, "time": 302.88 }, { "epoch": 0.37, "learning_rate": "9.9499e-06", "loss": 0.5902, "slid_loss": 0.5912, "step": 283, "time": 304.51 }, { "epoch": 0.37, "learning_rate": "9.9495e-06", "loss": 0.607, "slid_loss": 0.5915, "step": 284, "time": 303.7 }, { "epoch": 0.37, "learning_rate": "9.9490e-06", "loss": 0.5717, "slid_loss": 0.5916, "step": 285, "time": 293.89 }, { "epoch": 0.37, "learning_rate": "9.9486e-06", "loss": 0.5917, "slid_loss": 0.5915, "step": 286, "time": 294.93 }, { "epoch": 0.37, "learning_rate": "9.9482e-06", "loss": 0.6037, "slid_loss": 0.5917, "step": 287, "time": 292.8 }, { "epoch": 0.37, "learning_rate": "9.9478e-06", "loss": 0.5852, "slid_loss": 0.5913, "step": 288, "time": 305.96 }, { "epoch": 0.37, "learning_rate": "9.9474e-06", "loss": 0.6088, "slid_loss": 0.5917, "step": 289, "time": 300.0 }, { "epoch": 0.38, "learning_rate": "9.9470e-06", "loss": 0.589, "slid_loss": 0.5915, "step": 290, "time": 298.64 }, { "epoch": 0.38, "learning_rate": "9.9465e-06", "loss": 0.5874, "slid_loss": 0.5915, "step": 291, "time": 299.51 }, { "epoch": 0.38, "learning_rate": "9.9461e-06", "loss": 0.6055, "slid_loss": 0.5918, "step": 292, "time": 303.2 }, { "epoch": 0.38, "learning_rate": "9.9457e-06", "loss": 0.5701, "slid_loss": 0.5917, "step": 293, "time": 306.15 }, { "epoch": 0.38, "learning_rate": "9.9453e-06", "loss": 0.551, "slid_loss": 0.5911, "step": 294, "time": 296.03 }, { "epoch": 0.38, "learning_rate": "9.9448e-06", "loss": 0.5914, "slid_loss": 0.5912, "step": 295, "time": 299.9 }, { "epoch": 0.38, "learning_rate": "9.9444e-06", "loss": 0.6068, "slid_loss": 0.5914, "step": 296, "time": 303.06 }, { "epoch": 0.38, "learning_rate": "9.9440e-06", "loss": 0.5881, "slid_loss": 0.5914, "step": 297, "time": 302.97 }, { "epoch": 0.39, "learning_rate": "9.9435e-06", "loss": 0.5652, "slid_loss": 0.5913, "step": 298, "time": 299.7 }, { "epoch": 0.39, "learning_rate": "9.9431e-06", "loss": 0.5771, "slid_loss": 0.5911, "step": 299, "time": 297.81 }, { "epoch": 0.39, "learning_rate": "9.9427e-06", "loss": 0.5984, "slid_loss": 0.5909, "step": 300, "time": 309.93 }, { "epoch": 0.39, "learning_rate": "9.9422e-06", "loss": 0.5798, "slid_loss": 0.5911, "step": 301, "time": 2145.71 }, { "epoch": 0.39, "learning_rate": "9.9418e-06", "loss": 0.5467, "slid_loss": 0.5907, "step": 302, "time": 304.18 }, { "epoch": 0.39, "learning_rate": "9.9413e-06", "loss": 0.6062, "slid_loss": 0.5909, "step": 303, "time": 303.3 }, { "epoch": 0.39, "learning_rate": "9.9409e-06", "loss": 0.5981, "slid_loss": 0.5905, "step": 304, "time": 301.71 }, { "epoch": 0.39, "learning_rate": "9.9404e-06", "loss": 0.5806, "slid_loss": 0.5903, "step": 305, "time": 298.37 }, { "epoch": 0.4, "learning_rate": "9.9400e-06", "loss": 0.6079, "slid_loss": 0.5906, "step": 306, "time": 297.14 }, { "epoch": 0.4, "learning_rate": "9.9396e-06", "loss": 0.5935, "slid_loss": 0.5906, "step": 307, "time": 302.82 }, { "epoch": 0.4, "learning_rate": "9.9391e-06", "loss": 0.561, "slid_loss": 0.5903, "step": 308, "time": 298.85 }, { "epoch": 0.4, "learning_rate": "9.9387e-06", "loss": 0.603, "slid_loss": 0.5904, "step": 309, "time": 298.61 }, { "epoch": 0.4, "learning_rate": "9.9382e-06", "loss": 0.5999, "slid_loss": 0.5907, "step": 310, "time": 301.25 }, { "epoch": 0.4, "learning_rate": "9.9377e-06", "loss": 0.5995, "slid_loss": 0.5909, "step": 311, "time": 299.59 }, { "epoch": 0.4, "learning_rate": "9.9373e-06", "loss": 0.5712, "slid_loss": 0.5908, "step": 312, "time": 302.55 }, { "epoch": 0.41, "learning_rate": "9.9368e-06", "loss": 0.5979, "slid_loss": 0.5907, "step": 313, "time": 298.82 }, { "epoch": 0.41, "learning_rate": "9.9364e-06", "loss": 0.5781, "slid_loss": 0.5905, "step": 314, "time": 296.46 }, { "epoch": 0.41, "learning_rate": "9.9359e-06", "loss": 0.5776, "slid_loss": 0.5903, "step": 315, "time": 299.03 }, { "epoch": 0.41, "learning_rate": "9.9354e-06", "loss": 0.575, "slid_loss": 0.5901, "step": 316, "time": 298.66 }, { "epoch": 0.41, "learning_rate": "9.9350e-06", "loss": 0.5841, "slid_loss": 0.5902, "step": 317, "time": 299.26 }, { "epoch": 0.41, "learning_rate": "9.9345e-06", "loss": 0.5874, "slid_loss": 0.5901, "step": 318, "time": 298.97 }, { "epoch": 0.41, "learning_rate": "9.9340e-06", "loss": 0.5836, "slid_loss": 0.5901, "step": 319, "time": 301.89 }, { "epoch": 0.41, "learning_rate": "9.9336e-06", "loss": 0.5722, "slid_loss": 0.5897, "step": 320, "time": 301.23 }, { "epoch": 0.42, "learning_rate": "9.9331e-06", "loss": 0.5815, "slid_loss": 0.5895, "step": 321, "time": 300.73 }, { "epoch": 0.42, "learning_rate": "9.9326e-06", "loss": 0.582, "slid_loss": 0.5895, "step": 322, "time": 306.95 }, { "epoch": 0.42, "learning_rate": "9.9322e-06", "loss": 0.5925, "slid_loss": 0.5893, "step": 323, "time": 303.77 }, { "epoch": 0.42, "learning_rate": "9.9317e-06", "loss": 0.5637, "slid_loss": 0.5893, "step": 324, "time": 302.23 }, { "epoch": 0.42, "learning_rate": "9.9312e-06", "loss": 0.5697, "slid_loss": 0.589, "step": 325, "time": 300.31 }, { "epoch": 0.42, "learning_rate": "9.9307e-06", "loss": 0.5983, "slid_loss": 0.5889, "step": 326, "time": 304.98 }, { "epoch": 0.42, "learning_rate": "9.9302e-06", "loss": 0.5884, "slid_loss": 0.589, "step": 327, "time": 295.6 }, { "epoch": 0.42, "learning_rate": "9.9298e-06", "loss": 0.5902, "slid_loss": 0.5891, "step": 328, "time": 300.44 }, { "epoch": 0.43, "learning_rate": "9.9293e-06", "loss": 0.5706, "slid_loss": 0.5891, "step": 329, "time": 299.45 }, { "epoch": 0.43, "learning_rate": "9.9288e-06", "loss": 0.5751, "slid_loss": 0.5891, "step": 330, "time": 300.15 }, { "epoch": 0.43, "learning_rate": "9.9283e-06", "loss": 0.5948, "slid_loss": 0.5891, "step": 331, "time": 301.39 }, { "epoch": 0.43, "learning_rate": "9.9278e-06", "loss": 0.585, "slid_loss": 0.5889, "step": 332, "time": 300.47 }, { "epoch": 0.43, "learning_rate": "9.9273e-06", "loss": 0.598, "slid_loss": 0.5888, "step": 333, "time": 301.51 }, { "epoch": 0.43, "learning_rate": "9.9268e-06", "loss": 0.5929, "slid_loss": 0.5889, "step": 334, "time": 301.08 }, { "epoch": 0.43, "learning_rate": "9.9263e-06", "loss": 0.6013, "slid_loss": 0.5891, "step": 335, "time": 301.25 }, { "epoch": 0.44, "learning_rate": "9.9258e-06", "loss": 0.5635, "slid_loss": 0.5888, "step": 336, "time": 302.84 }, { "epoch": 0.44, "learning_rate": "9.9253e-06", "loss": 0.583, "slid_loss": 0.5887, "step": 337, "time": 302.95 }, { "epoch": 0.44, "learning_rate": "9.9248e-06", "loss": 0.5615, "slid_loss": 0.5883, "step": 338, "time": 304.5 }, { "epoch": 0.44, "learning_rate": "9.9243e-06", "loss": 0.5832, "slid_loss": 0.5884, "step": 339, "time": 301.31 }, { "epoch": 0.44, "learning_rate": "9.9238e-06", "loss": 0.5584, "slid_loss": 0.588, "step": 340, "time": 301.03 }, { "epoch": 0.44, "learning_rate": "9.9233e-06", "loss": 0.5838, "slid_loss": 0.588, "step": 341, "time": 300.61 }, { "epoch": 0.44, "learning_rate": "9.9228e-06", "loss": 0.5854, "slid_loss": 0.5878, "step": 342, "time": 304.07 }, { "epoch": 0.44, "learning_rate": "9.9223e-06", "loss": 0.5775, "slid_loss": 0.5876, "step": 343, "time": 304.08 }, { "epoch": 0.45, "learning_rate": "9.9218e-06", "loss": 0.587, "slid_loss": 0.5877, "step": 344, "time": 301.56 }, { "epoch": 0.45, "learning_rate": "9.9213e-06", "loss": 0.5812, "slid_loss": 0.5875, "step": 345, "time": 297.96 }, { "epoch": 0.45, "learning_rate": "9.9208e-06", "loss": 0.5783, "slid_loss": 0.5875, "step": 346, "time": 302.47 }, { "epoch": 0.45, "learning_rate": "9.9203e-06", "loss": 0.5731, "slid_loss": 0.5872, "step": 347, "time": 297.21 }, { "epoch": 0.45, "learning_rate": "9.9198e-06", "loss": 0.5725, "slid_loss": 0.5869, "step": 348, "time": 300.15 }, { "epoch": 0.45, "learning_rate": "9.9192e-06", "loss": 0.5565, "slid_loss": 0.5866, "step": 349, "time": 299.24 }, { "epoch": 0.45, "learning_rate": "9.9187e-06", "loss": 0.6033, "slid_loss": 0.5865, "step": 350, "time": 297.98 }, { "epoch": 0.45, "learning_rate": "9.9182e-06", "loss": 0.5588, "slid_loss": 0.5862, "step": 351, "time": 298.4 }, { "epoch": 0.46, "learning_rate": "9.9177e-06", "loss": 0.5595, "slid_loss": 0.5859, "step": 352, "time": 300.12 }, { "epoch": 0.46, "learning_rate": "9.9171e-06", "loss": 0.5777, "slid_loss": 0.5857, "step": 353, "time": 293.74 }, { "epoch": 0.46, "learning_rate": "9.9166e-06", "loss": 0.587, "slid_loss": 0.5855, "step": 354, "time": 300.09 }, { "epoch": 0.46, "learning_rate": "9.9161e-06", "loss": 0.5791, "slid_loss": 0.5852, "step": 355, "time": 299.74 }, { "epoch": 0.46, "learning_rate": "9.9156e-06", "loss": 0.5859, "slid_loss": 0.5851, "step": 356, "time": 299.89 }, { "epoch": 0.46, "learning_rate": "9.9150e-06", "loss": 0.5808, "slid_loss": 0.585, "step": 357, "time": 299.12 }, { "epoch": 0.46, "learning_rate": "9.9145e-06", "loss": 0.5901, "slid_loss": 0.5852, "step": 358, "time": 301.57 }, { "epoch": 0.46, "learning_rate": "9.9140e-06", "loss": 0.5809, "slid_loss": 0.5851, "step": 359, "time": 302.98 }, { "epoch": 0.47, "learning_rate": "9.9134e-06", "loss": 0.5859, "slid_loss": 0.585, "step": 360, "time": 296.56 }, { "epoch": 0.47, "learning_rate": "9.9129e-06", "loss": 0.592, "slid_loss": 0.5849, "step": 361, "time": 309.77 }, { "epoch": 0.47, "learning_rate": "9.9124e-06", "loss": 0.5672, "slid_loss": 0.5847, "step": 362, "time": 442.06 }, { "epoch": 0.47, "learning_rate": "9.9118e-06", "loss": 0.5837, "slid_loss": 0.5844, "step": 363, "time": 543.81 }, { "epoch": 0.47, "learning_rate": "9.9113e-06", "loss": 0.5883, "slid_loss": 0.5845, "step": 364, "time": 307.58 }, { "epoch": 0.47, "learning_rate": "9.9107e-06", "loss": 0.5914, "slid_loss": 0.5845, "step": 365, "time": 302.4 }, { "epoch": 0.47, "learning_rate": "9.9102e-06", "loss": 0.5901, "slid_loss": 0.5847, "step": 366, "time": 304.61 }, { "epoch": 0.48, "learning_rate": "9.9096e-06", "loss": 0.6143, "slid_loss": 0.5848, "step": 367, "time": 301.14 }, { "epoch": 0.48, "learning_rate": "9.9091e-06", "loss": 0.5647, "slid_loss": 0.5847, "step": 368, "time": 304.0 }, { "epoch": 0.48, "learning_rate": "9.9085e-06", "loss": 0.5874, "slid_loss": 0.5845, "step": 369, "time": 299.09 }, { "epoch": 0.48, "learning_rate": "9.9080e-06", "loss": 0.5769, "slid_loss": 0.5843, "step": 370, "time": 300.46 }, { "epoch": 0.48, "learning_rate": "9.9074e-06", "loss": 0.5568, "slid_loss": 0.5838, "step": 371, "time": 299.6 }, { "epoch": 0.48, "learning_rate": "9.9069e-06", "loss": 0.5953, "slid_loss": 0.5838, "step": 372, "time": 303.64 }, { "epoch": 0.48, "learning_rate": "9.9063e-06", "loss": 0.5935, "slid_loss": 0.5838, "step": 373, "time": 302.9 }, { "epoch": 0.48, "learning_rate": "9.9058e-06", "loss": 0.5781, "slid_loss": 0.5837, "step": 374, "time": 303.66 }, { "epoch": 0.49, "learning_rate": "9.9052e-06", "loss": 0.5954, "slid_loss": 0.5837, "step": 375, "time": 301.25 }, { "epoch": 0.49, "learning_rate": "9.9046e-06", "loss": 0.5562, "slid_loss": 0.5835, "step": 376, "time": 302.16 }, { "epoch": 0.49, "learning_rate": "9.9041e-06", "loss": 0.5978, "slid_loss": 0.5835, "step": 377, "time": 301.0 }, { "epoch": 0.49, "learning_rate": "9.9035e-06", "loss": 0.5904, "slid_loss": 0.5837, "step": 378, "time": 301.57 }, { "epoch": 0.49, "learning_rate": "9.9030e-06", "loss": 0.61, "slid_loss": 0.5839, "step": 379, "time": 298.34 }, { "epoch": 0.49, "learning_rate": "9.9024e-06", "loss": 0.5824, "slid_loss": 0.5842, "step": 380, "time": 298.45 }, { "epoch": 0.49, "learning_rate": "9.9018e-06", "loss": 0.5803, "slid_loss": 0.5842, "step": 381, "time": 299.83 }, { "epoch": 0.49, "learning_rate": "9.9012e-06", "loss": 0.6062, "slid_loss": 0.5841, "step": 382, "time": 302.79 }, { "epoch": 0.5, "learning_rate": "9.9007e-06", "loss": 0.5783, "slid_loss": 0.584, "step": 383, "time": 304.89 }, { "epoch": 0.5, "learning_rate": "9.9001e-06", "loss": 0.5873, "slid_loss": 0.5838, "step": 384, "time": 299.4 }, { "epoch": 0.5, "learning_rate": "9.8995e-06", "loss": 0.5714, "slid_loss": 0.5838, "step": 385, "time": 302.76 }, { "epoch": 0.5, "learning_rate": "9.8989e-06", "loss": 0.5849, "slid_loss": 0.5837, "step": 386, "time": 298.68 }, { "epoch": 0.5, "learning_rate": "9.8984e-06", "loss": 0.5512, "slid_loss": 0.5832, "step": 387, "time": 302.47 }, { "epoch": 0.5, "learning_rate": "9.8978e-06", "loss": 0.5851, "slid_loss": 0.5832, "step": 388, "time": 300.3 }, { "epoch": 0.5, "learning_rate": "9.8972e-06", "loss": 0.5749, "slid_loss": 0.5828, "step": 389, "time": 303.56 }, { "epoch": 0.51, "learning_rate": "9.8966e-06", "loss": 0.5559, "slid_loss": 0.5825, "step": 390, "time": 303.89 }, { "epoch": 0.51, "learning_rate": "9.8960e-06", "loss": 0.5666, "slid_loss": 0.5823, "step": 391, "time": 298.94 }, { "epoch": 0.51, "learning_rate": "9.8954e-06", "loss": 0.5939, "slid_loss": 0.5822, "step": 392, "time": 299.49 }, { "epoch": 0.51, "learning_rate": "9.8949e-06", "loss": 0.5914, "slid_loss": 0.5824, "step": 393, "time": 300.26 }, { "epoch": 0.51, "learning_rate": "9.8943e-06", "loss": 0.5602, "slid_loss": 0.5825, "step": 394, "time": 301.06 }, { "epoch": 0.51, "learning_rate": "9.8937e-06", "loss": 0.5878, "slid_loss": 0.5824, "step": 395, "time": 298.55 }, { "epoch": 0.51, "learning_rate": "9.8931e-06", "loss": 0.5964, "slid_loss": 0.5823, "step": 396, "time": 297.95 }, { "epoch": 0.51, "learning_rate": "9.8925e-06", "loss": 0.6124, "slid_loss": 0.5826, "step": 397, "time": 302.23 }, { "epoch": 0.52, "learning_rate": "9.8919e-06", "loss": 0.5672, "slid_loss": 0.5826, "step": 398, "time": 298.67 }, { "epoch": 0.52, "learning_rate": "9.8913e-06", "loss": 0.5678, "slid_loss": 0.5825, "step": 399, "time": 296.19 }, { "epoch": 0.52, "learning_rate": "9.8907e-06", "loss": 0.5952, "slid_loss": 0.5825, "step": 400, "time": 294.41 }, { "epoch": 0.52, "learning_rate": "9.8901e-06", "loss": 0.5817, "slid_loss": 0.5825, "step": 401, "time": 2131.8 }, { "epoch": 0.52, "learning_rate": "9.8895e-06", "loss": 0.5577, "slid_loss": 0.5826, "step": 402, "time": 303.45 }, { "epoch": 0.52, "learning_rate": "9.8889e-06", "loss": 0.5867, "slid_loss": 0.5824, "step": 403, "time": 303.95 }, { "epoch": 0.52, "learning_rate": "9.8883e-06", "loss": 0.5767, "slid_loss": 0.5822, "step": 404, "time": 299.6 }, { "epoch": 0.52, "learning_rate": "9.8877e-06", "loss": 0.5631, "slid_loss": 0.582, "step": 405, "time": 296.97 }, { "epoch": 0.53, "learning_rate": "9.8870e-06", "loss": 0.5671, "slid_loss": 0.5816, "step": 406, "time": 296.01 }, { "epoch": 0.53, "learning_rate": "9.8864e-06", "loss": 0.5607, "slid_loss": 0.5813, "step": 407, "time": 292.73 }, { "epoch": 0.53, "learning_rate": "9.8858e-06", "loss": 0.5612, "slid_loss": 0.5813, "step": 408, "time": 299.45 }, { "epoch": 0.53, "learning_rate": "9.8852e-06", "loss": 0.5591, "slid_loss": 0.5808, "step": 409, "time": 296.08 }, { "epoch": 0.53, "learning_rate": "9.8846e-06", "loss": 0.5762, "slid_loss": 0.5806, "step": 410, "time": 299.42 }, { "epoch": 0.53, "learning_rate": "9.8840e-06", "loss": 0.5762, "slid_loss": 0.5804, "step": 411, "time": 302.72 }, { "epoch": 0.53, "learning_rate": "9.8834e-06", "loss": 0.5675, "slid_loss": 0.5803, "step": 412, "time": 304.89 }, { "epoch": 0.53, "learning_rate": "9.8827e-06", "loss": 0.5656, "slid_loss": 0.58, "step": 413, "time": 300.11 }, { "epoch": 0.54, "learning_rate": "9.8821e-06", "loss": 0.599, "slid_loss": 0.5802, "step": 414, "time": 300.68 }, { "epoch": 0.54, "learning_rate": "9.8815e-06", "loss": 0.5735, "slid_loss": 0.5802, "step": 415, "time": 300.05 }, { "epoch": 0.54, "learning_rate": "9.8809e-06", "loss": 0.5825, "slid_loss": 0.5803, "step": 416, "time": 300.83 }, { "epoch": 0.54, "learning_rate": "9.8802e-06", "loss": 0.5733, "slid_loss": 0.5802, "step": 417, "time": 298.88 }, { "epoch": 0.54, "learning_rate": "9.8796e-06", "loss": 0.5703, "slid_loss": 0.58, "step": 418, "time": 305.66 }, { "epoch": 0.54, "learning_rate": "9.8790e-06", "loss": 0.5645, "slid_loss": 0.5798, "step": 419, "time": 300.94 }, { "epoch": 0.54, "learning_rate": "9.8783e-06", "loss": 0.5623, "slid_loss": 0.5797, "step": 420, "time": 299.63 }, { "epoch": 0.55, "learning_rate": "9.8777e-06", "loss": 0.5444, "slid_loss": 0.5793, "step": 421, "time": 298.09 }, { "epoch": 0.55, "learning_rate": "9.8771e-06", "loss": 0.5864, "slid_loss": 0.5794, "step": 422, "time": 297.47 }, { "epoch": 0.55, "learning_rate": "9.8764e-06", "loss": 0.6055, "slid_loss": 0.5795, "step": 423, "time": 304.79 }, { "epoch": 0.55, "learning_rate": "9.8758e-06", "loss": 0.57, "slid_loss": 0.5796, "step": 424, "time": 301.25 }, { "epoch": 0.55, "learning_rate": "9.8752e-06", "loss": 0.5871, "slid_loss": 0.5797, "step": 425, "time": 298.86 }, { "epoch": 0.55, "learning_rate": "9.8745e-06", "loss": 0.5809, "slid_loss": 0.5796, "step": 426, "time": 301.44 }, { "epoch": 0.55, "learning_rate": "9.8739e-06", "loss": 0.5766, "slid_loss": 0.5794, "step": 427, "time": 302.51 }, { "epoch": 0.55, "learning_rate": "9.8732e-06", "loss": 0.5997, "slid_loss": 0.5795, "step": 428, "time": 295.31 }, { "epoch": 0.56, "learning_rate": "9.8726e-06", "loss": 0.5724, "slid_loss": 0.5796, "step": 429, "time": 304.51 }, { "epoch": 0.56, "learning_rate": "9.8719e-06", "loss": 0.5924, "slid_loss": 0.5797, "step": 430, "time": 300.1 }, { "epoch": 0.56, "learning_rate": "9.8713e-06", "loss": 0.611, "slid_loss": 0.5799, "step": 431, "time": 302.85 }, { "epoch": 0.56, "learning_rate": "9.8706e-06", "loss": 0.5887, "slid_loss": 0.5799, "step": 432, "time": 300.26 }, { "epoch": 0.56, "learning_rate": "9.8700e-06", "loss": 0.5958, "slid_loss": 0.5799, "step": 433, "time": 297.76 }, { "epoch": 0.56, "learning_rate": "9.8693e-06", "loss": 0.5765, "slid_loss": 0.5797, "step": 434, "time": 301.87 }, { "epoch": 0.56, "learning_rate": "9.8687e-06", "loss": 0.5859, "slid_loss": 0.5796, "step": 435, "time": 303.1 }, { "epoch": 0.56, "learning_rate": "9.8680e-06", "loss": 0.5864, "slid_loss": 0.5798, "step": 436, "time": 301.61 }, { "epoch": 0.57, "learning_rate": "9.8673e-06", "loss": 0.5822, "slid_loss": 0.5798, "step": 437, "time": 302.85 }, { "epoch": 0.57, "learning_rate": "9.8667e-06", "loss": 0.5707, "slid_loss": 0.5799, "step": 438, "time": 301.6 }, { "epoch": 0.57, "learning_rate": "9.8660e-06", "loss": 0.5834, "slid_loss": 0.5799, "step": 439, "time": 303.64 }, { "epoch": 0.57, "learning_rate": "9.8653e-06", "loss": 0.575, "slid_loss": 0.5801, "step": 440, "time": 300.26 }, { "epoch": 0.57, "learning_rate": "9.8647e-06", "loss": 0.5684, "slid_loss": 0.5799, "step": 441, "time": 298.04 }, { "epoch": 0.57, "learning_rate": "9.8640e-06", "loss": 0.5908, "slid_loss": 0.58, "step": 442, "time": 299.82 }, { "epoch": 0.57, "learning_rate": "9.8633e-06", "loss": 0.6021, "slid_loss": 0.5802, "step": 443, "time": 299.4 }, { "epoch": 0.57, "learning_rate": "9.8627e-06", "loss": 0.5846, "slid_loss": 0.5802, "step": 444, "time": 302.43 }, { "epoch": 0.58, "learning_rate": "9.8620e-06", "loss": 0.5923, "slid_loss": 0.5803, "step": 445, "time": 303.88 }, { "epoch": 0.58, "learning_rate": "9.8613e-06", "loss": 0.5359, "slid_loss": 0.5799, "step": 446, "time": 298.65 }, { "epoch": 0.58, "learning_rate": "9.8607e-06", "loss": 0.5764, "slid_loss": 0.5799, "step": 447, "time": 300.04 }, { "epoch": 0.58, "learning_rate": "9.8600e-06", "loss": 0.5913, "slid_loss": 0.5801, "step": 448, "time": 303.88 }, { "epoch": 0.58, "learning_rate": "9.8593e-06", "loss": 0.5527, "slid_loss": 0.5801, "step": 449, "time": 301.12 }, { "epoch": 0.58, "learning_rate": "9.8586e-06", "loss": 0.563, "slid_loss": 0.5797, "step": 450, "time": 298.58 }, { "epoch": 0.58, "learning_rate": "9.8579e-06", "loss": 0.5561, "slid_loss": 0.5796, "step": 451, "time": 299.14 }, { "epoch": 0.59, "learning_rate": "9.8572e-06", "loss": 0.5594, "slid_loss": 0.5796, "step": 452, "time": 299.44 }, { "epoch": 0.59, "learning_rate": "9.8566e-06", "loss": 0.5778, "slid_loss": 0.5796, "step": 453, "time": 302.98 }, { "epoch": 0.59, "learning_rate": "9.8559e-06", "loss": 0.5815, "slid_loss": 0.5796, "step": 454, "time": 298.23 }, { "epoch": 0.59, "learning_rate": "9.8552e-06", "loss": 0.5589, "slid_loss": 0.5794, "step": 455, "time": 300.7 }, { "epoch": 0.59, "learning_rate": "9.8545e-06", "loss": 0.5716, "slid_loss": 0.5792, "step": 456, "time": 299.94 }, { "epoch": 0.59, "learning_rate": "9.8538e-06", "loss": 0.5927, "slid_loss": 0.5793, "step": 457, "time": 302.7 }, { "epoch": 0.59, "learning_rate": "9.8531e-06", "loss": 0.5675, "slid_loss": 0.5791, "step": 458, "time": 297.41 }, { "epoch": 0.59, "learning_rate": "9.8524e-06", "loss": 0.5934, "slid_loss": 0.5792, "step": 459, "time": 302.7 }, { "epoch": 0.6, "learning_rate": "9.8517e-06", "loss": 0.6052, "slid_loss": 0.5794, "step": 460, "time": 300.68 }, { "epoch": 0.6, "learning_rate": "9.8510e-06", "loss": 0.5644, "slid_loss": 0.5792, "step": 461, "time": 295.54 }, { "epoch": 0.6, "learning_rate": "9.8503e-06", "loss": 0.562, "slid_loss": 0.5791, "step": 462, "time": 298.32 }, { "epoch": 0.6, "learning_rate": "9.8496e-06", "loss": 0.5411, "slid_loss": 0.5787, "step": 463, "time": 303.31 }, { "epoch": 0.6, "learning_rate": "9.8489e-06", "loss": 0.549, "slid_loss": 0.5783, "step": 464, "time": 299.02 }, { "epoch": 0.6, "learning_rate": "9.8482e-06", "loss": 0.5705, "slid_loss": 0.5781, "step": 465, "time": 302.64 }, { "epoch": 0.6, "learning_rate": "9.8475e-06", "loss": 0.5853, "slid_loss": 0.578, "step": 466, "time": 301.21 }, { "epoch": 0.6, "learning_rate": "9.8468e-06", "loss": 0.5954, "slid_loss": 0.5778, "step": 467, "time": 299.8 }, { "epoch": 0.61, "learning_rate": "9.8461e-06", "loss": 0.6089, "slid_loss": 0.5783, "step": 468, "time": 299.48 }, { "epoch": 0.61, "learning_rate": "9.8454e-06", "loss": 0.5906, "slid_loss": 0.5783, "step": 469, "time": 295.69 }, { "epoch": 0.61, "learning_rate": "9.8447e-06", "loss": 0.5781, "slid_loss": 0.5783, "step": 470, "time": 297.51 }, { "epoch": 0.61, "learning_rate": "9.8440e-06", "loss": 0.5976, "slid_loss": 0.5787, "step": 471, "time": 302.98 }, { "epoch": 0.61, "learning_rate": "9.8432e-06", "loss": 0.5703, "slid_loss": 0.5785, "step": 472, "time": 300.12 }, { "epoch": 0.61, "learning_rate": "9.8425e-06", "loss": 0.5825, "slid_loss": 0.5784, "step": 473, "time": 304.01 }, { "epoch": 0.61, "learning_rate": "9.8418e-06", "loss": 0.5771, "slid_loss": 0.5784, "step": 474, "time": 296.5 }, { "epoch": 0.62, "learning_rate": "9.8411e-06", "loss": 0.5687, "slid_loss": 0.5781, "step": 475, "time": 300.4 }, { "epoch": 0.62, "learning_rate": "9.8404e-06", "loss": 0.6119, "slid_loss": 0.5787, "step": 476, "time": 305.33 }, { "epoch": 0.62, "learning_rate": "9.8396e-06", "loss": 0.5623, "slid_loss": 0.5783, "step": 477, "time": 298.81 }, { "epoch": 0.62, "learning_rate": "9.8389e-06", "loss": 0.5784, "slid_loss": 0.5782, "step": 478, "time": 299.61 }, { "epoch": 0.62, "learning_rate": "9.8382e-06", "loss": 0.5713, "slid_loss": 0.5778, "step": 479, "time": 299.37 }, { "epoch": 0.62, "learning_rate": "9.8375e-06", "loss": 0.5819, "slid_loss": 0.5778, "step": 480, "time": 303.75 }, { "epoch": 0.62, "learning_rate": "9.8367e-06", "loss": 0.6179, "slid_loss": 0.5782, "step": 481, "time": 301.54 }, { "epoch": 0.62, "learning_rate": "9.8360e-06", "loss": 0.5672, "slid_loss": 0.5778, "step": 482, "time": 320.09 }, { "epoch": 0.63, "learning_rate": "9.8353e-06", "loss": 0.5843, "slid_loss": 0.5778, "step": 483, "time": 457.75 }, { "epoch": 0.63, "learning_rate": "9.8345e-06", "loss": 0.5752, "slid_loss": 0.5777, "step": 484, "time": 431.51 }, { "epoch": 0.63, "learning_rate": "9.8338e-06", "loss": 0.5523, "slid_loss": 0.5775, "step": 485, "time": 321.57 }, { "epoch": 0.63, "learning_rate": "9.8331e-06", "loss": 0.5891, "slid_loss": 0.5776, "step": 486, "time": 300.63 }, { "epoch": 0.63, "learning_rate": "9.8323e-06", "loss": 0.5725, "slid_loss": 0.5778, "step": 487, "time": 303.72 }, { "epoch": 0.63, "learning_rate": "9.8316e-06", "loss": 0.5792, "slid_loss": 0.5777, "step": 488, "time": 298.59 }, { "epoch": 0.63, "learning_rate": "9.8308e-06", "loss": 0.5648, "slid_loss": 0.5776, "step": 489, "time": 298.27 }, { "epoch": 0.63, "learning_rate": "9.8301e-06", "loss": 0.5668, "slid_loss": 0.5777, "step": 490, "time": 305.54 }, { "epoch": 0.64, "learning_rate": "9.8293e-06", "loss": 0.568, "slid_loss": 0.5777, "step": 491, "time": 303.7 }, { "epoch": 0.64, "learning_rate": "9.8286e-06", "loss": 0.5719, "slid_loss": 0.5775, "step": 492, "time": 300.89 }, { "epoch": 0.64, "learning_rate": "9.8278e-06", "loss": 0.6006, "slid_loss": 0.5776, "step": 493, "time": 297.17 }, { "epoch": 0.64, "learning_rate": "9.8271e-06", "loss": 0.6049, "slid_loss": 0.5781, "step": 494, "time": 302.1 }, { "epoch": 0.64, "learning_rate": "9.8263e-06", "loss": 0.5823, "slid_loss": 0.578, "step": 495, "time": 304.26 }, { "epoch": 0.64, "learning_rate": "9.8256e-06", "loss": 0.5863, "slid_loss": 0.5779, "step": 496, "time": 300.98 }, { "epoch": 0.64, "learning_rate": "9.8248e-06", "loss": 0.5983, "slid_loss": 0.5778, "step": 497, "time": 295.65 }, { "epoch": 0.64, "learning_rate": "9.8241e-06", "loss": 0.5871, "slid_loss": 0.578, "step": 498, "time": 301.29 }, { "epoch": 0.65, "learning_rate": "9.8233e-06", "loss": 0.5856, "slid_loss": 0.5781, "step": 499, "time": 298.92 }, { "epoch": 0.65, "learning_rate": "9.8226e-06", "loss": 0.5778, "slid_loss": 0.578, "step": 500, "time": 303.74 }, { "epoch": 0.65, "learning_rate": "9.8218e-06", "loss": 0.5678, "slid_loss": 0.5778, "step": 501, "time": 2049.88 }, { "epoch": 0.65, "learning_rate": "9.8210e-06", "loss": 0.591, "slid_loss": 0.5782, "step": 502, "time": 297.95 }, { "epoch": 0.65, "learning_rate": "9.8203e-06", "loss": 0.5714, "slid_loss": 0.578, "step": 503, "time": 302.64 }, { "epoch": 0.65, "learning_rate": "9.8195e-06", "loss": 0.5987, "slid_loss": 0.5782, "step": 504, "time": 301.99 }, { "epoch": 0.65, "learning_rate": "9.8187e-06", "loss": 0.575, "slid_loss": 0.5783, "step": 505, "time": 297.94 }, { "epoch": 0.66, "learning_rate": "9.8180e-06", "loss": 0.5822, "slid_loss": 0.5785, "step": 506, "time": 293.53 }, { "epoch": 0.66, "learning_rate": "9.8172e-06", "loss": 0.5876, "slid_loss": 0.5788, "step": 507, "time": 300.75 }, { "epoch": 0.66, "learning_rate": "9.8164e-06", "loss": 0.5721, "slid_loss": 0.5789, "step": 508, "time": 303.66 }, { "epoch": 0.66, "learning_rate": "9.8157e-06", "loss": 0.5543, "slid_loss": 0.5788, "step": 509, "time": 302.79 }, { "epoch": 0.66, "learning_rate": "9.8149e-06", "loss": 0.5652, "slid_loss": 0.5787, "step": 510, "time": 300.58 }, { "epoch": 0.66, "learning_rate": "9.8141e-06", "loss": 0.6082, "slid_loss": 0.579, "step": 511, "time": 305.49 }, { "epoch": 0.66, "learning_rate": "9.8133e-06", "loss": 0.5794, "slid_loss": 0.5792, "step": 512, "time": 301.02 }, { "epoch": 0.66, "learning_rate": "9.8125e-06", "loss": 0.5701, "slid_loss": 0.5792, "step": 513, "time": 303.23 }, { "epoch": 0.67, "learning_rate": "9.8118e-06", "loss": 0.5764, "slid_loss": 0.579, "step": 514, "time": 300.22 }, { "epoch": 0.67, "learning_rate": "9.8110e-06", "loss": 0.5837, "slid_loss": 0.5791, "step": 515, "time": 305.91 }, { "epoch": 0.67, "learning_rate": "9.8102e-06", "loss": 0.5901, "slid_loss": 0.5792, "step": 516, "time": 301.4 }, { "epoch": 0.67, "learning_rate": "9.8094e-06", "loss": 0.5621, "slid_loss": 0.579, "step": 517, "time": 296.36 }, { "epoch": 0.67, "learning_rate": "9.8086e-06", "loss": 0.5756, "slid_loss": 0.5791, "step": 518, "time": 301.91 }, { "epoch": 0.67, "learning_rate": "9.8078e-06", "loss": 0.5763, "slid_loss": 0.5792, "step": 519, "time": 301.14 }, { "epoch": 0.67, "learning_rate": "9.8070e-06", "loss": 0.5869, "slid_loss": 0.5795, "step": 520, "time": 296.65 }, { "epoch": 0.67, "learning_rate": "9.8062e-06", "loss": 0.5763, "slid_loss": 0.5798, "step": 521, "time": 299.31 }, { "epoch": 0.68, "learning_rate": "9.8054e-06", "loss": 0.5921, "slid_loss": 0.5798, "step": 522, "time": 300.99 }, { "epoch": 0.68, "learning_rate": "9.8047e-06", "loss": 0.5499, "slid_loss": 0.5793, "step": 523, "time": 300.13 }, { "epoch": 0.68, "learning_rate": "9.8039e-06", "loss": 0.5583, "slid_loss": 0.5792, "step": 524, "time": 298.62 }, { "epoch": 0.68, "learning_rate": "9.8031e-06", "loss": 0.5705, "slid_loss": 0.579, "step": 525, "time": 298.48 }, { "epoch": 0.68, "learning_rate": "9.8023e-06", "loss": 0.618, "slid_loss": 0.5794, "step": 526, "time": 296.88 }, { "epoch": 0.68, "learning_rate": "9.8015e-06", "loss": 0.5895, "slid_loss": 0.5795, "step": 527, "time": 296.39 }, { "epoch": 0.68, "learning_rate": "9.8007e-06", "loss": 0.5516, "slid_loss": 0.579, "step": 528, "time": 299.57 }, { "epoch": 0.69, "learning_rate": "9.7998e-06", "loss": 0.593, "slid_loss": 0.5792, "step": 529, "time": 302.3 }, { "epoch": 0.69, "learning_rate": "9.7990e-06", "loss": 0.5789, "slid_loss": 0.5791, "step": 530, "time": 300.1 }, { "epoch": 0.69, "learning_rate": "9.7982e-06", "loss": 0.5832, "slid_loss": 0.5788, "step": 531, "time": 300.39 }, { "epoch": 0.69, "learning_rate": "9.7974e-06", "loss": 0.5875, "slid_loss": 0.5788, "step": 532, "time": 302.23 }, { "epoch": 0.69, "learning_rate": "9.7966e-06", "loss": 0.5849, "slid_loss": 0.5787, "step": 533, "time": 304.22 }, { "epoch": 0.69, "learning_rate": "9.7958e-06", "loss": 0.574, "slid_loss": 0.5787, "step": 534, "time": 296.74 }, { "epoch": 0.69, "learning_rate": "9.7950e-06", "loss": 0.5671, "slid_loss": 0.5785, "step": 535, "time": 298.14 }, { "epoch": 0.69, "learning_rate": "9.7942e-06", "loss": 0.5839, "slid_loss": 0.5785, "step": 536, "time": 294.14 }, { "epoch": 0.7, "learning_rate": "9.7933e-06", "loss": 0.5725, "slid_loss": 0.5784, "step": 537, "time": 299.7 }, { "epoch": 0.7, "learning_rate": "9.7925e-06", "loss": 0.5664, "slid_loss": 0.5783, "step": 538, "time": 293.94 }, { "epoch": 0.7, "learning_rate": "9.7917e-06", "loss": 0.581, "slid_loss": 0.5783, "step": 539, "time": 299.01 }, { "epoch": 0.7, "learning_rate": "9.7909e-06", "loss": 0.6031, "slid_loss": 0.5786, "step": 540, "time": 299.36 }, { "epoch": 0.7, "learning_rate": "9.7901e-06", "loss": 0.5569, "slid_loss": 0.5785, "step": 541, "time": 302.66 }, { "epoch": 0.7, "learning_rate": "9.7892e-06", "loss": 0.5837, "slid_loss": 0.5784, "step": 542, "time": 297.96 }, { "epoch": 0.7, "learning_rate": "9.7884e-06", "loss": 0.5852, "slid_loss": 0.5782, "step": 543, "time": 302.07 }, { "epoch": 0.7, "learning_rate": "9.7876e-06", "loss": 0.5566, "slid_loss": 0.5779, "step": 544, "time": 299.15 }, { "epoch": 0.71, "learning_rate": "9.7868e-06", "loss": 0.5821, "slid_loss": 0.5778, "step": 545, "time": 295.31 }, { "epoch": 0.71, "learning_rate": "9.7859e-06", "loss": 0.5775, "slid_loss": 0.5783, "step": 546, "time": 302.12 }, { "epoch": 0.71, "learning_rate": "9.7851e-06", "loss": 0.6021, "slid_loss": 0.5785, "step": 547, "time": 298.46 }, { "epoch": 0.71, "learning_rate": "9.7843e-06", "loss": 0.6014, "slid_loss": 0.5786, "step": 548, "time": 297.01 }, { "epoch": 0.71, "learning_rate": "9.7834e-06", "loss": 0.5815, "slid_loss": 0.5789, "step": 549, "time": 303.58 }, { "epoch": 0.71, "learning_rate": "9.7826e-06", "loss": 0.5721, "slid_loss": 0.579, "step": 550, "time": 293.17 }, { "epoch": 0.71, "learning_rate": "9.7817e-06", "loss": 0.5768, "slid_loss": 0.5792, "step": 551, "time": 297.87 }, { "epoch": 0.71, "learning_rate": "9.7809e-06", "loss": 0.5622, "slid_loss": 0.5792, "step": 552, "time": 303.11 }, { "epoch": 0.72, "learning_rate": "9.7801e-06", "loss": 0.5484, "slid_loss": 0.5789, "step": 553, "time": 298.86 }, { "epoch": 0.72, "learning_rate": "9.7792e-06", "loss": 0.5793, "slid_loss": 0.5789, "step": 554, "time": 300.88 }, { "epoch": 0.72, "learning_rate": "9.7784e-06", "loss": 0.5794, "slid_loss": 0.5791, "step": 555, "time": 297.44 }, { "epoch": 0.72, "learning_rate": "9.7775e-06", "loss": 0.5711, "slid_loss": 0.5791, "step": 556, "time": 297.39 }, { "epoch": 0.72, "learning_rate": "9.7767e-06", "loss": 0.5841, "slid_loss": 0.579, "step": 557, "time": 299.79 }, { "epoch": 0.72, "learning_rate": "9.7758e-06", "loss": 0.5912, "slid_loss": 0.5793, "step": 558, "time": 298.87 }, { "epoch": 0.72, "learning_rate": "9.7750e-06", "loss": 0.5774, "slid_loss": 0.5791, "step": 559, "time": 301.18 }, { "epoch": 0.73, "learning_rate": "9.7741e-06", "loss": 0.5456, "slid_loss": 0.5785, "step": 560, "time": 298.72 }, { "epoch": 0.73, "learning_rate": "9.7733e-06", "loss": 0.5367, "slid_loss": 0.5782, "step": 561, "time": 299.38 }, { "epoch": 0.73, "learning_rate": "9.7724e-06", "loss": 0.5657, "slid_loss": 0.5783, "step": 562, "time": 298.86 }, { "epoch": 0.73, "learning_rate": "9.7716e-06", "loss": 0.5877, "slid_loss": 0.5787, "step": 563, "time": 303.56 }, { "epoch": 0.73, "learning_rate": "9.7707e-06", "loss": 0.5731, "slid_loss": 0.579, "step": 564, "time": 302.98 }, { "epoch": 0.73, "learning_rate": "9.7698e-06", "loss": 0.5662, "slid_loss": 0.5789, "step": 565, "time": 299.55 }, { "epoch": 0.73, "learning_rate": "9.7690e-06", "loss": 0.5598, "slid_loss": 0.5787, "step": 566, "time": 296.33 }, { "epoch": 0.73, "learning_rate": "9.7681e-06", "loss": 0.5672, "slid_loss": 0.5784, "step": 567, "time": 299.91 }, { "epoch": 0.74, "learning_rate": "9.7672e-06", "loss": 0.5767, "slid_loss": 0.5781, "step": 568, "time": 300.64 }, { "epoch": 0.74, "learning_rate": "9.7664e-06", "loss": 0.5659, "slid_loss": 0.5778, "step": 569, "time": 302.66 }, { "epoch": 0.74, "learning_rate": "9.7655e-06", "loss": 0.5682, "slid_loss": 0.5777, "step": 570, "time": 298.52 }, { "epoch": 0.74, "learning_rate": "9.7646e-06", "loss": 0.5822, "slid_loss": 0.5776, "step": 571, "time": 302.79 }, { "epoch": 0.74, "learning_rate": "9.7638e-06", "loss": 0.587, "slid_loss": 0.5777, "step": 572, "time": 303.17 }, { "epoch": 0.74, "learning_rate": "9.7629e-06", "loss": 0.5786, "slid_loss": 0.5777, "step": 573, "time": 295.3 }, { "epoch": 0.74, "learning_rate": "9.7620e-06", "loss": 0.6122, "slid_loss": 0.578, "step": 574, "time": 303.38 }, { "epoch": 0.74, "learning_rate": "9.7611e-06", "loss": 0.5696, "slid_loss": 0.5781, "step": 575, "time": 300.1 }, { "epoch": 0.75, "learning_rate": "9.7603e-06", "loss": 0.5628, "slid_loss": 0.5776, "step": 576, "time": 300.2 }, { "epoch": 0.75, "learning_rate": "9.7594e-06", "loss": 0.5719, "slid_loss": 0.5777, "step": 577, "time": 300.67 }, { "epoch": 0.75, "learning_rate": "9.7585e-06", "loss": 0.592, "slid_loss": 0.5778, "step": 578, "time": 302.63 }, { "epoch": 0.75, "learning_rate": "9.7576e-06", "loss": 0.5533, "slid_loss": 0.5776, "step": 579, "time": 303.71 }, { "epoch": 0.75, "learning_rate": "9.7567e-06", "loss": 0.5822, "slid_loss": 0.5776, "step": 580, "time": 301.66 }, { "epoch": 0.75, "learning_rate": "9.7559e-06", "loss": 0.5546, "slid_loss": 0.577, "step": 581, "time": 299.5 }, { "epoch": 0.75, "learning_rate": "9.7550e-06", "loss": 0.5803, "slid_loss": 0.5771, "step": 582, "time": 299.14 }, { "epoch": 0.75, "learning_rate": "9.7541e-06", "loss": 0.5686, "slid_loss": 0.577, "step": 583, "time": 304.85 }, { "epoch": 0.76, "learning_rate": "9.7532e-06", "loss": 0.5827, "slid_loss": 0.577, "step": 584, "time": 304.42 }, { "epoch": 0.76, "learning_rate": "9.7523e-06", "loss": 0.5796, "slid_loss": 0.5773, "step": 585, "time": 304.0 }, { "epoch": 0.76, "learning_rate": "9.7514e-06", "loss": 0.5625, "slid_loss": 0.577, "step": 586, "time": 297.89 }, { "epoch": 0.76, "learning_rate": "9.7505e-06", "loss": 0.5733, "slid_loss": 0.5771, "step": 587, "time": 298.03 }, { "epoch": 0.76, "learning_rate": "9.7496e-06", "loss": 0.5815, "slid_loss": 0.5771, "step": 588, "time": 303.5 }, { "epoch": 0.76, "learning_rate": "9.7487e-06", "loss": 0.5898, "slid_loss": 0.5773, "step": 589, "time": 301.59 }, { "epoch": 0.76, "learning_rate": "9.7478e-06", "loss": 0.5994, "slid_loss": 0.5777, "step": 590, "time": 303.28 }, { "epoch": 0.77, "learning_rate": "9.7469e-06", "loss": 0.5832, "slid_loss": 0.5778, "step": 591, "time": 294.78 }, { "epoch": 0.77, "learning_rate": "9.7460e-06", "loss": 0.5676, "slid_loss": 0.5778, "step": 592, "time": 304.99 }, { "epoch": 0.77, "learning_rate": "9.7451e-06", "loss": 0.5801, "slid_loss": 0.5776, "step": 593, "time": 298.88 }, { "epoch": 0.77, "learning_rate": "9.7442e-06", "loss": 0.5912, "slid_loss": 0.5774, "step": 594, "time": 295.82 }, { "epoch": 0.77, "learning_rate": "9.7433e-06", "loss": 0.5739, "slid_loss": 0.5773, "step": 595, "time": 300.49 }, { "epoch": 0.77, "learning_rate": "9.7424e-06", "loss": 0.5688, "slid_loss": 0.5772, "step": 596, "time": 294.8 }, { "epoch": 0.77, "learning_rate": "9.7415e-06", "loss": 0.5834, "slid_loss": 0.577, "step": 597, "time": 303.61 }, { "epoch": 0.77, "learning_rate": "9.7406e-06", "loss": 0.5988, "slid_loss": 0.5771, "step": 598, "time": 296.42 }, { "epoch": 0.78, "learning_rate": "9.7397e-06", "loss": 0.578, "slid_loss": 0.5771, "step": 599, "time": 293.08 }, { "epoch": 0.78, "learning_rate": "9.7387e-06", "loss": 0.5955, "slid_loss": 0.5772, "step": 600, "time": 299.55 }, { "epoch": 0.78, "learning_rate": "9.7378e-06", "loss": 0.5693, "slid_loss": 0.5772, "step": 601, "time": 2053.43 }, { "epoch": 0.78, "learning_rate": "9.7369e-06", "loss": 0.5697, "slid_loss": 0.577, "step": 602, "time": 297.5 }, { "epoch": 0.78, "learning_rate": "9.7360e-06", "loss": 0.5897, "slid_loss": 0.5772, "step": 603, "time": 355.72 }, { "epoch": 0.78, "learning_rate": "9.7351e-06", "loss": 0.5582, "slid_loss": 0.5768, "step": 604, "time": 543.71 }, { "epoch": 0.78, "learning_rate": "9.7341e-06", "loss": 0.5588, "slid_loss": 0.5766, "step": 605, "time": 450.24 }, { "epoch": 0.78, "learning_rate": "9.7332e-06", "loss": 0.5721, "slid_loss": 0.5765, "step": 606, "time": 309.18 }, { "epoch": 0.79, "learning_rate": "9.7323e-06", "loss": 0.5639, "slid_loss": 0.5763, "step": 607, "time": 300.7 }, { "epoch": 0.79, "learning_rate": "9.7314e-06", "loss": 0.567, "slid_loss": 0.5763, "step": 608, "time": 301.61 }, { "epoch": 0.79, "learning_rate": "9.7304e-06", "loss": 0.5601, "slid_loss": 0.5763, "step": 609, "time": 300.66 }, { "epoch": 0.79, "learning_rate": "9.7295e-06", "loss": 0.5744, "slid_loss": 0.5764, "step": 610, "time": 296.57 }, { "epoch": 0.79, "learning_rate": "9.7286e-06", "loss": 0.5749, "slid_loss": 0.5761, "step": 611, "time": 294.55 }, { "epoch": 0.79, "learning_rate": "9.7277e-06", "loss": 0.5752, "slid_loss": 0.576, "step": 612, "time": 296.3 }, { "epoch": 0.79, "learning_rate": "9.7267e-06", "loss": 0.5614, "slid_loss": 0.5759, "step": 613, "time": 298.5 }, { "epoch": 0.8, "learning_rate": "9.7258e-06", "loss": 0.5752, "slid_loss": 0.5759, "step": 614, "time": 292.6 }, { "epoch": 0.8, "learning_rate": "9.7249e-06", "loss": 0.587, "slid_loss": 0.576, "step": 615, "time": 297.26 }, { "epoch": 0.8, "learning_rate": "9.7239e-06", "loss": 0.5631, "slid_loss": 0.5757, "step": 616, "time": 307.76 }, { "epoch": 0.8, "learning_rate": "9.7230e-06", "loss": 0.6094, "slid_loss": 0.5762, "step": 617, "time": 300.4 }, { "epoch": 0.8, "learning_rate": "9.7220e-06", "loss": 0.5581, "slid_loss": 0.576, "step": 618, "time": 298.17 }, { "epoch": 0.8, "learning_rate": "9.7211e-06", "loss": 0.5693, "slid_loss": 0.5759, "step": 619, "time": 299.92 }, { "epoch": 0.8, "learning_rate": "9.7201e-06", "loss": 0.5682, "slid_loss": 0.5757, "step": 620, "time": 302.01 }, { "epoch": 0.8, "learning_rate": "9.7192e-06", "loss": 0.5668, "slid_loss": 0.5756, "step": 621, "time": 302.89 }, { "epoch": 0.81, "learning_rate": "9.7183e-06", "loss": 0.5831, "slid_loss": 0.5755, "step": 622, "time": 300.0 }, { "epoch": 0.81, "learning_rate": "9.7173e-06", "loss": 0.5892, "slid_loss": 0.5759, "step": 623, "time": 297.76 }, { "epoch": 0.81, "learning_rate": "9.7164e-06", "loss": 0.5668, "slid_loss": 0.576, "step": 624, "time": 302.83 }, { "epoch": 0.81, "learning_rate": "9.7154e-06", "loss": 0.5879, "slid_loss": 0.5762, "step": 625, "time": 308.26 }, { "epoch": 0.81, "learning_rate": "9.7144e-06", "loss": 0.5769, "slid_loss": 0.5758, "step": 626, "time": 298.8 }, { "epoch": 0.81, "learning_rate": "9.7135e-06", "loss": 0.5729, "slid_loss": 0.5756, "step": 627, "time": 302.73 }, { "epoch": 0.81, "learning_rate": "9.7125e-06", "loss": 0.5622, "slid_loss": 0.5757, "step": 628, "time": 304.61 }, { "epoch": 0.81, "learning_rate": "9.7116e-06", "loss": 0.5882, "slid_loss": 0.5757, "step": 629, "time": 302.35 }, { "epoch": 0.82, "learning_rate": "9.7106e-06", "loss": 0.5668, "slid_loss": 0.5756, "step": 630, "time": 301.02 }, { "epoch": 0.82, "learning_rate": "9.7097e-06", "loss": 0.5877, "slid_loss": 0.5756, "step": 631, "time": 300.32 }, { "epoch": 0.82, "learning_rate": "9.7087e-06", "loss": 0.5718, "slid_loss": 0.5754, "step": 632, "time": 293.92 }, { "epoch": 0.82, "learning_rate": "9.7077e-06", "loss": 0.5553, "slid_loss": 0.5752, "step": 633, "time": 296.84 }, { "epoch": 0.82, "learning_rate": "9.7068e-06", "loss": 0.5692, "slid_loss": 0.5751, "step": 634, "time": 303.08 }, { "epoch": 0.82, "learning_rate": "9.7058e-06", "loss": 0.5597, "slid_loss": 0.575, "step": 635, "time": 304.45 }, { "epoch": 0.82, "learning_rate": "9.7048e-06", "loss": 0.5771, "slid_loss": 0.575, "step": 636, "time": 300.98 }, { "epoch": 0.82, "learning_rate": "9.7039e-06", "loss": 0.5856, "slid_loss": 0.5751, "step": 637, "time": 303.39 }, { "epoch": 0.83, "learning_rate": "9.7029e-06", "loss": 0.5705, "slid_loss": 0.5751, "step": 638, "time": 300.56 }, { "epoch": 0.83, "learning_rate": "9.7019e-06", "loss": 0.5753, "slid_loss": 0.5751, "step": 639, "time": 302.55 }, { "epoch": 0.83, "learning_rate": "9.7009e-06", "loss": 0.5651, "slid_loss": 0.5747, "step": 640, "time": 300.37 }, { "epoch": 0.83, "learning_rate": "9.7000e-06", "loss": 0.5477, "slid_loss": 0.5746, "step": 641, "time": 299.12 }, { "epoch": 0.83, "learning_rate": "9.6990e-06", "loss": 0.5573, "slid_loss": 0.5743, "step": 642, "time": 302.26 }, { "epoch": 0.83, "learning_rate": "9.6980e-06", "loss": 0.5694, "slid_loss": 0.5742, "step": 643, "time": 300.38 }, { "epoch": 0.83, "learning_rate": "9.6970e-06", "loss": 0.596, "slid_loss": 0.5746, "step": 644, "time": 303.72 }, { "epoch": 0.84, "learning_rate": "9.6961e-06", "loss": 0.568, "slid_loss": 0.5744, "step": 645, "time": 299.1 }, { "epoch": 0.84, "learning_rate": "9.6951e-06", "loss": 0.5573, "slid_loss": 0.5742, "step": 646, "time": 296.37 }, { "epoch": 0.84, "learning_rate": "9.6941e-06", "loss": 0.572, "slid_loss": 0.5739, "step": 647, "time": 302.76 }, { "epoch": 0.84, "learning_rate": "9.6931e-06", "loss": 0.6008, "slid_loss": 0.5739, "step": 648, "time": 304.74 }, { "epoch": 0.84, "learning_rate": "9.6921e-06", "loss": 0.5735, "slid_loss": 0.5738, "step": 649, "time": 303.11 }, { "epoch": 0.84, "learning_rate": "9.6911e-06", "loss": 0.5629, "slid_loss": 0.5738, "step": 650, "time": 301.38 }, { "epoch": 0.84, "learning_rate": "9.6901e-06", "loss": 0.5738, "slid_loss": 0.5737, "step": 651, "time": 303.03 }, { "epoch": 0.84, "learning_rate": "9.6891e-06", "loss": 0.5789, "slid_loss": 0.5739, "step": 652, "time": 299.84 }, { "epoch": 0.85, "learning_rate": "9.6881e-06", "loss": 0.5989, "slid_loss": 0.5744, "step": 653, "time": 304.95 }, { "epoch": 0.85, "learning_rate": "9.6872e-06", "loss": 0.5602, "slid_loss": 0.5742, "step": 654, "time": 305.98 }, { "epoch": 0.85, "learning_rate": "9.6862e-06", "loss": 0.5695, "slid_loss": 0.5741, "step": 655, "time": 299.64 }, { "epoch": 0.85, "learning_rate": "9.6852e-06", "loss": 0.5742, "slid_loss": 0.5741, "step": 656, "time": 300.47 }, { "epoch": 0.85, "learning_rate": "9.6842e-06", "loss": 0.5649, "slid_loss": 0.5739, "step": 657, "time": 299.06 }, { "epoch": 0.85, "learning_rate": "9.6832e-06", "loss": 0.5553, "slid_loss": 0.5736, "step": 658, "time": 302.71 }, { "epoch": 0.85, "learning_rate": "9.6822e-06", "loss": 0.5709, "slid_loss": 0.5735, "step": 659, "time": 302.34 }, { "epoch": 0.85, "learning_rate": "9.6812e-06", "loss": 0.5565, "slid_loss": 0.5736, "step": 660, "time": 298.23 }, { "epoch": 0.86, "learning_rate": "9.6801e-06", "loss": 0.573, "slid_loss": 0.574, "step": 661, "time": 295.97 }, { "epoch": 0.86, "learning_rate": "9.6791e-06", "loss": 0.5797, "slid_loss": 0.5741, "step": 662, "time": 299.7 }, { "epoch": 0.86, "learning_rate": "9.6781e-06", "loss": 0.5464, "slid_loss": 0.5737, "step": 663, "time": 299.82 }, { "epoch": 0.86, "learning_rate": "9.6771e-06", "loss": 0.5701, "slid_loss": 0.5737, "step": 664, "time": 305.05 }, { "epoch": 0.86, "learning_rate": "9.6761e-06", "loss": 0.5567, "slid_loss": 0.5736, "step": 665, "time": 299.79 }, { "epoch": 0.86, "learning_rate": "9.6751e-06", "loss": 0.5588, "slid_loss": 0.5736, "step": 666, "time": 300.74 }, { "epoch": 0.86, "learning_rate": "9.6741e-06", "loss": 0.5927, "slid_loss": 0.5738, "step": 667, "time": 300.0 }, { "epoch": 0.87, "learning_rate": "9.6731e-06", "loss": 0.5572, "slid_loss": 0.5736, "step": 668, "time": 301.77 }, { "epoch": 0.87, "learning_rate": "9.6721e-06", "loss": 0.5461, "slid_loss": 0.5734, "step": 669, "time": 297.96 }, { "epoch": 0.87, "learning_rate": "9.6710e-06", "loss": 0.564, "slid_loss": 0.5734, "step": 670, "time": 301.97 }, { "epoch": 0.87, "learning_rate": "9.6700e-06", "loss": 0.5656, "slid_loss": 0.5732, "step": 671, "time": 299.58 }, { "epoch": 0.87, "learning_rate": "9.6690e-06", "loss": 0.5534, "slid_loss": 0.5729, "step": 672, "time": 296.02 }, { "epoch": 0.87, "learning_rate": "9.6680e-06", "loss": 0.5292, "slid_loss": 0.5724, "step": 673, "time": 302.8 }, { "epoch": 0.87, "learning_rate": "9.6669e-06", "loss": 0.5549, "slid_loss": 0.5718, "step": 674, "time": 301.94 }, { "epoch": 0.87, "learning_rate": "9.6659e-06", "loss": 0.5647, "slid_loss": 0.5718, "step": 675, "time": 300.67 }, { "epoch": 0.88, "learning_rate": "9.6649e-06", "loss": 0.555, "slid_loss": 0.5717, "step": 676, "time": 296.84 }, { "epoch": 0.88, "learning_rate": "9.6639e-06", "loss": 0.5872, "slid_loss": 0.5719, "step": 677, "time": 303.18 }, { "epoch": 0.88, "learning_rate": "9.6628e-06", "loss": 0.5538, "slid_loss": 0.5715, "step": 678, "time": 303.01 }, { "epoch": 0.88, "learning_rate": "9.6618e-06", "loss": 0.5584, "slid_loss": 0.5715, "step": 679, "time": 302.31 }, { "epoch": 0.88, "learning_rate": "9.6608e-06", "loss": 0.5835, "slid_loss": 0.5715, "step": 680, "time": 302.48 }, { "epoch": 0.88, "learning_rate": "9.6597e-06", "loss": 0.5938, "slid_loss": 0.5719, "step": 681, "time": 298.4 }, { "epoch": 0.88, "learning_rate": "9.6587e-06", "loss": 0.5878, "slid_loss": 0.572, "step": 682, "time": 300.58 }, { "epoch": 0.88, "learning_rate": "9.6577e-06", "loss": 0.5725, "slid_loss": 0.572, "step": 683, "time": 302.24 }, { "epoch": 0.89, "learning_rate": "9.6566e-06", "loss": 0.5648, "slid_loss": 0.5719, "step": 684, "time": 301.51 }, { "epoch": 0.89, "learning_rate": "9.6556e-06", "loss": 0.5772, "slid_loss": 0.5718, "step": 685, "time": 298.21 }, { "epoch": 0.89, "learning_rate": "9.6545e-06", "loss": 0.5852, "slid_loss": 0.5721, "step": 686, "time": 302.79 }, { "epoch": 0.89, "learning_rate": "9.6535e-06", "loss": 0.5793, "slid_loss": 0.5721, "step": 687, "time": 299.38 }, { "epoch": 0.89, "learning_rate": "9.6525e-06", "loss": 0.597, "slid_loss": 0.5723, "step": 688, "time": 304.05 }, { "epoch": 0.89, "learning_rate": "9.6514e-06", "loss": 0.5599, "slid_loss": 0.572, "step": 689, "time": 297.95 }, { "epoch": 0.89, "learning_rate": "9.6504e-06", "loss": 0.5822, "slid_loss": 0.5718, "step": 690, "time": 304.74 }, { "epoch": 0.89, "learning_rate": "9.6493e-06", "loss": 0.5838, "slid_loss": 0.5718, "step": 691, "time": 302.75 }, { "epoch": 0.9, "learning_rate": "9.6483e-06", "loss": 0.5442, "slid_loss": 0.5716, "step": 692, "time": 300.4 }, { "epoch": 0.9, "learning_rate": "9.6472e-06", "loss": 0.575, "slid_loss": 0.5715, "step": 693, "time": 302.57 }, { "epoch": 0.9, "learning_rate": "9.6462e-06", "loss": 0.5481, "slid_loss": 0.5711, "step": 694, "time": 302.08 }, { "epoch": 0.9, "learning_rate": "9.6451e-06", "loss": 0.5555, "slid_loss": 0.5709, "step": 695, "time": 296.83 }, { "epoch": 0.9, "learning_rate": "9.6440e-06", "loss": 0.5521, "slid_loss": 0.5708, "step": 696, "time": 305.62 }, { "epoch": 0.9, "learning_rate": "9.6430e-06", "loss": 0.5705, "slid_loss": 0.5706, "step": 697, "time": 297.63 }, { "epoch": 0.9, "learning_rate": "9.6419e-06", "loss": 0.5724, "slid_loss": 0.5704, "step": 698, "time": 296.55 }, { "epoch": 0.91, "learning_rate": "9.6409e-06", "loss": 0.5564, "slid_loss": 0.5701, "step": 699, "time": 301.49 }, { "epoch": 0.91, "learning_rate": "9.6398e-06", "loss": 0.5864, "slid_loss": 0.5701, "step": 700, "time": 306.47 } ], "logging_steps": 1.0, "max_steps": 3860, "num_train_epochs": 5, "save_steps": 50000.0, "total_flos": 0.0, "trial_name": null, "trial_params": null }