{ "best_metric": 1.012279748916626, "best_model_checkpoint": "./output_c/checkpoint-824976", "epoch": 48.0, "global_step": 824976, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "learning_rate": 1.960020946063886e-05, "loss": 3.5114, "step": 17186 }, { "epoch": 1.0, "eval_loss": 2.5249948501586914, "eval_runtime": 71.153, "eval_samples_per_second": 965.735, "eval_steps_per_second": 30.188, "step": 17187 }, { "epoch": 2.0, "learning_rate": 1.920034910106476e-05, "loss": 2.4281, "step": 34372 }, { "epoch": 2.0, "eval_loss": 2.0742549896240234, "eval_runtime": 71.0702, "eval_samples_per_second": 966.861, "eval_steps_per_second": 30.224, "step": 34374 }, { "epoch": 3.0, "learning_rate": 1.8800535288299298e-05, "loss": 2.0989, "step": 51558 }, { "epoch": 3.0, "eval_loss": 1.8547435998916626, "eval_runtime": 71.5379, "eval_samples_per_second": 960.54, "eval_steps_per_second": 30.026, "step": 51561 }, { "epoch": 4.0, "learning_rate": 1.8400698202129518e-05, "loss": 1.9144, "step": 68744 }, { "epoch": 4.0, "eval_loss": 1.7082182168960571, "eval_runtime": 71.3486, "eval_samples_per_second": 963.089, "eval_steps_per_second": 30.106, "step": 68748 }, { "epoch": 5.0, "learning_rate": 1.8000861115959738e-05, "loss": 1.7926, "step": 85930 }, { "epoch": 5.0, "eval_loss": 1.614298701286316, "eval_runtime": 72.4304, "eval_samples_per_second": 948.703, "eval_steps_per_second": 29.656, "step": 85935 }, { "epoch": 6.0, "learning_rate": 1.7601047303194276e-05, "loss": 1.7014, "step": 103116 }, { "epoch": 6.0, "eval_loss": 1.5449920892715454, "eval_runtime": 71.3467, "eval_samples_per_second": 963.114, "eval_steps_per_second": 30.107, "step": 103122 }, { "epoch": 7.0, "learning_rate": 1.7201210217024496e-05, "loss": 1.6306, "step": 120302 }, { "epoch": 7.0, "eval_loss": 1.4825724363327026, "eval_runtime": 71.2883, "eval_samples_per_second": 963.903, "eval_steps_per_second": 30.131, "step": 120309 }, { "epoch": 8.0, "learning_rate": 1.6801373130854716e-05, "loss": 1.5745, "step": 137488 }, { "epoch": 8.0, "eval_loss": 1.4376918077468872, "eval_runtime": 72.4942, "eval_samples_per_second": 947.869, "eval_steps_per_second": 29.63, "step": 137496 }, { "epoch": 9.0, "learning_rate": 1.6401559318089254e-05, "loss": 1.5262, "step": 154674 }, { "epoch": 9.0, "eval_loss": 1.3938175439834595, "eval_runtime": 71.2189, "eval_samples_per_second": 964.842, "eval_steps_per_second": 30.161, "step": 154683 }, { "epoch": 10.0, "learning_rate": 1.6001722231919474e-05, "loss": 1.4839, "step": 171860 }, { "epoch": 10.0, "eval_loss": 1.363573431968689, "eval_runtime": 71.173, "eval_samples_per_second": 965.465, "eval_steps_per_second": 30.18, "step": 171870 }, { "epoch": 11.0, "learning_rate": 1.5601908419154012e-05, "loss": 1.4505, "step": 189046 }, { "epoch": 11.0, "eval_loss": 1.3312129974365234, "eval_runtime": 71.256, "eval_samples_per_second": 964.339, "eval_steps_per_second": 30.145, "step": 189057 }, { "epoch": 12.0, "learning_rate": 1.5202071332984234e-05, "loss": 1.4187, "step": 206232 }, { "epoch": 12.0, "eval_loss": 1.3099381923675537, "eval_runtime": 71.1495, "eval_samples_per_second": 965.783, "eval_steps_per_second": 30.19, "step": 206244 }, { "epoch": 13.0, "learning_rate": 1.480225752021877e-05, "loss": 1.39, "step": 223418 }, { "epoch": 13.0, "eval_loss": 1.2823187112808228, "eval_runtime": 69.4163, "eval_samples_per_second": 989.898, "eval_steps_per_second": 30.944, "step": 223431 }, { "epoch": 14.0, "learning_rate": 1.4402420434048992e-05, "loss": 1.3651, "step": 240604 }, { "epoch": 14.0, "eval_loss": 1.2610409259796143, "eval_runtime": 69.462, "eval_samples_per_second": 989.246, "eval_steps_per_second": 30.923, "step": 240618 }, { "epoch": 15.0, "learning_rate": 1.4002583347879212e-05, "loss": 1.3436, "step": 257790 }, { "epoch": 15.0, "eval_loss": 1.2433568239212036, "eval_runtime": 69.4357, "eval_samples_per_second": 989.621, "eval_steps_per_second": 30.935, "step": 257805 }, { "epoch": 16.0, "learning_rate": 1.3602722988305115e-05, "loss": 1.3245, "step": 274976 }, { "epoch": 16.0, "eval_loss": 1.2258530855178833, "eval_runtime": 69.4442, "eval_samples_per_second": 989.499, "eval_steps_per_second": 30.931, "step": 274992 }, { "epoch": 17.0, "learning_rate": 1.3202885902135337e-05, "loss": 1.305, "step": 292162 }, { "epoch": 17.0, "eval_loss": 1.2092918157577515, "eval_runtime": 69.4339, "eval_samples_per_second": 989.646, "eval_steps_per_second": 30.936, "step": 292179 }, { "epoch": 18.0, "learning_rate": 1.2803072089369875e-05, "loss": 1.2878, "step": 309348 }, { "epoch": 18.0, "eval_loss": 1.1935821771621704, "eval_runtime": 69.4288, "eval_samples_per_second": 989.72, "eval_steps_per_second": 30.938, "step": 309366 }, { "epoch": 19.0, "learning_rate": 1.2403304823413045e-05, "loss": 1.2716, "step": 326534 }, { "epoch": 19.0, "eval_loss": 1.1804686784744263, "eval_runtime": 69.4238, "eval_samples_per_second": 989.791, "eval_steps_per_second": 30.94, "step": 326553 }, { "epoch": 20.0, "learning_rate": 1.2003421190434631e-05, "loss": 1.2577, "step": 343720 }, { "epoch": 20.0, "eval_loss": 1.170041799545288, "eval_runtime": 69.4213, "eval_samples_per_second": 989.826, "eval_steps_per_second": 30.942, "step": 343740 }, { "epoch": 21.0, "learning_rate": 1.1603584104264853e-05, "loss": 1.2451, "step": 360906 }, { "epoch": 21.0, "eval_loss": 1.1530039310455322, "eval_runtime": 69.5559, "eval_samples_per_second": 987.911, "eval_steps_per_second": 30.882, "step": 360927 }, { "epoch": 22.0, "learning_rate": 1.1203747018095073e-05, "loss": 1.2312, "step": 378092 }, { "epoch": 22.0, "eval_loss": 1.1468385457992554, "eval_runtime": 70.6551, "eval_samples_per_second": 972.542, "eval_steps_per_second": 30.401, "step": 378114 }, { "epoch": 23.0, "learning_rate": 1.0803933205329611e-05, "loss": 1.2189, "step": 395278 }, { "epoch": 23.0, "eval_loss": 1.1345593929290771, "eval_runtime": 69.4572, "eval_samples_per_second": 989.315, "eval_steps_per_second": 30.926, "step": 395301 }, { "epoch": 24.0, "learning_rate": 1.040411939256415e-05, "loss": 1.2081, "step": 412464 }, { "epoch": 24.0, "eval_loss": 1.1272401809692383, "eval_runtime": 69.4251, "eval_samples_per_second": 989.772, "eval_steps_per_second": 30.94, "step": 412488 }, { "epoch": 25.0, "learning_rate": 1.0004305579798686e-05, "loss": 1.1972, "step": 429650 }, { "epoch": 25.0, "eval_loss": 1.1170649528503418, "eval_runtime": 69.594, "eval_samples_per_second": 987.37, "eval_steps_per_second": 30.865, "step": 429675 }, { "epoch": 26.0, "learning_rate": 9.604491767033224e-06, "loss": 1.187, "step": 446836 }, { "epoch": 26.0, "eval_loss": 1.1084502935409546, "eval_runtime": 69.4386, "eval_samples_per_second": 989.579, "eval_steps_per_second": 30.934, "step": 446862 }, { "epoch": 27.0, "learning_rate": 9.204654680863444e-06, "loss": 1.1777, "step": 464022 }, { "epoch": 27.0, "eval_loss": 1.100696086883545, "eval_runtime": 69.6537, "eval_samples_per_second": 986.524, "eval_steps_per_second": 30.838, "step": 464049 }, { "epoch": 28.0, "learning_rate": 8.804840868097981e-06, "loss": 1.1691, "step": 481208 }, { "epoch": 28.0, "eval_loss": 1.0973334312438965, "eval_runtime": 69.4707, "eval_samples_per_second": 989.122, "eval_steps_per_second": 30.92, "step": 481236 }, { "epoch": 29.0, "learning_rate": 8.40502705533252e-06, "loss": 1.1611, "step": 498394 }, { "epoch": 29.0, "eval_loss": 1.0852068662643433, "eval_runtime": 69.6222, "eval_samples_per_second": 986.969, "eval_steps_per_second": 30.852, "step": 498423 }, { "epoch": 30.0, "learning_rate": 8.005189969162739e-06, "loss": 1.1534, "step": 515580 }, { "epoch": 30.0, "eval_loss": 1.082985520362854, "eval_runtime": 69.4202, "eval_samples_per_second": 989.842, "eval_steps_per_second": 30.942, "step": 515610 }, { "epoch": 31.0, "learning_rate": 7.6053761563972775e-06, "loss": 1.1453, "step": 532766 }, { "epoch": 31.0, "eval_loss": 1.0722641944885254, "eval_runtime": 69.4459, "eval_samples_per_second": 989.476, "eval_steps_per_second": 30.931, "step": 532797 }, { "epoch": 32.0, "learning_rate": 7.205539070227498e-06, "loss": 1.1387, "step": 549952 }, { "epoch": 32.0, "eval_loss": 1.0714243650436401, "eval_runtime": 69.4689, "eval_samples_per_second": 989.147, "eval_steps_per_second": 30.92, "step": 549984 }, { "epoch": 33.0, "learning_rate": 6.805678710653402e-06, "loss": 1.1304, "step": 567138 }, { "epoch": 33.0, "eval_loss": 1.0586808919906616, "eval_runtime": 69.5198, "eval_samples_per_second": 988.423, "eval_steps_per_second": 30.898, "step": 567171 }, { "epoch": 34.0, "learning_rate": 6.405864897887939e-06, "loss": 1.1243, "step": 584324 }, { "epoch": 34.0, "eval_loss": 1.0512875318527222, "eval_runtime": 69.4412, "eval_samples_per_second": 989.542, "eval_steps_per_second": 30.933, "step": 584358 }, { "epoch": 35.0, "learning_rate": 6.006051085122476e-06, "loss": 1.1192, "step": 601510 }, { "epoch": 35.0, "eval_loss": 1.0537949800491333, "eval_runtime": 69.726, "eval_samples_per_second": 985.5, "eval_steps_per_second": 30.806, "step": 601545 }, { "epoch": 36.0, "learning_rate": 5.606213998952698e-06, "loss": 1.1126, "step": 618696 }, { "epoch": 36.0, "eval_loss": 1.049811840057373, "eval_runtime": 69.9084, "eval_samples_per_second": 982.929, "eval_steps_per_second": 30.726, "step": 618732 }, { "epoch": 37.0, "learning_rate": 5.206446732995869e-06, "loss": 1.1083, "step": 635882 }, { "epoch": 37.0, "eval_loss": 1.0433541536331177, "eval_runtime": 69.7631, "eval_samples_per_second": 984.977, "eval_steps_per_second": 30.79, "step": 635919 }, { "epoch": 38.0, "learning_rate": 4.8066096468260895e-06, "loss": 1.1037, "step": 653068 }, { "epoch": 38.0, "eval_loss": 1.0362184047698975, "eval_runtime": 69.5998, "eval_samples_per_second": 987.287, "eval_steps_per_second": 30.862, "step": 653106 }, { "epoch": 39.0, "learning_rate": 4.4067492872519926e-06, "loss": 1.0997, "step": 670254 }, { "epoch": 39.0, "eval_loss": 1.037041187286377, "eval_runtime": 69.6134, "eval_samples_per_second": 987.095, "eval_steps_per_second": 30.856, "step": 670293 }, { "epoch": 40.0, "learning_rate": 4.006935474486531e-06, "loss": 1.0952, "step": 687440 }, { "epoch": 40.0, "eval_loss": 1.0285437107086182, "eval_runtime": 69.7372, "eval_samples_per_second": 985.341, "eval_steps_per_second": 30.801, "step": 687480 }, { "epoch": 41.0, "learning_rate": 3.6071216617210684e-06, "loss": 1.0891, "step": 704626 }, { "epoch": 41.0, "eval_loss": 1.0275565385818481, "eval_runtime": 69.6129, "eval_samples_per_second": 987.101, "eval_steps_per_second": 30.856, "step": 704667 }, { "epoch": 42.0, "learning_rate": 3.207261302146972e-06, "loss": 1.086, "step": 721812 }, { "epoch": 42.0, "eval_loss": 1.0269238948822021, "eval_runtime": 69.6407, "eval_samples_per_second": 986.708, "eval_steps_per_second": 30.844, "step": 721854 }, { "epoch": 43.0, "learning_rate": 2.8074707627858266e-06, "loss": 1.0831, "step": 738998 }, { "epoch": 43.0, "eval_loss": 1.0213665962219238, "eval_runtime": 69.6166, "eval_samples_per_second": 987.049, "eval_steps_per_second": 30.855, "step": 739041 }, { "epoch": 44.0, "learning_rate": 2.4076569500203645e-06, "loss": 1.0791, "step": 756184 }, { "epoch": 44.0, "eval_loss": 1.020618200302124, "eval_runtime": 69.8024, "eval_samples_per_second": 984.422, "eval_steps_per_second": 30.773, "step": 756228 }, { "epoch": 45.0, "learning_rate": 2.007819863850585e-06, "loss": 1.077, "step": 773370 }, { "epoch": 45.0, "eval_loss": 1.0169862508773804, "eval_runtime": 69.6571, "eval_samples_per_second": 986.475, "eval_steps_per_second": 30.837, "step": 773415 }, { "epoch": 46.0, "learning_rate": 1.608052597893757e-06, "loss": 1.0739, "step": 790556 }, { "epoch": 46.0, "eval_loss": 1.0156688690185547, "eval_runtime": 69.7503, "eval_samples_per_second": 985.157, "eval_steps_per_second": 30.796, "step": 790602 }, { "epoch": 47.0, "learning_rate": 1.2081922383196604e-06, "loss": 1.0709, "step": 807742 }, { "epoch": 47.0, "eval_loss": 1.0143157243728638, "eval_runtime": 69.6201, "eval_samples_per_second": 986.999, "eval_steps_per_second": 30.853, "step": 807789 }, { "epoch": 48.0, "learning_rate": 8.084249723628325e-07, "loss": 1.0693, "step": 824928 }, { "epoch": 48.0, "eval_loss": 1.012279748916626, "eval_runtime": 69.6307, "eval_samples_per_second": 986.849, "eval_steps_per_second": 30.848, "step": 824976 } ], "max_steps": 859350, "num_train_epochs": 50, "total_flos": 1.7374699189290516e+18, "trial_name": null, "trial_params": null }