|
{ |
|
"best_metric": 1.012279748916626, |
|
"best_model_checkpoint": "./output_c/checkpoint-824976", |
|
"epoch": 48.0, |
|
"global_step": 824976, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 1.960020946063886e-05, |
|
"loss": 3.5114, |
|
"step": 17186 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 2.5249948501586914, |
|
"eval_runtime": 71.153, |
|
"eval_samples_per_second": 965.735, |
|
"eval_steps_per_second": 30.188, |
|
"step": 17187 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 1.920034910106476e-05, |
|
"loss": 2.4281, |
|
"step": 34372 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 2.0742549896240234, |
|
"eval_runtime": 71.0702, |
|
"eval_samples_per_second": 966.861, |
|
"eval_steps_per_second": 30.224, |
|
"step": 34374 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 1.8800535288299298e-05, |
|
"loss": 2.0989, |
|
"step": 51558 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.8547435998916626, |
|
"eval_runtime": 71.5379, |
|
"eval_samples_per_second": 960.54, |
|
"eval_steps_per_second": 30.026, |
|
"step": 51561 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 1.8400698202129518e-05, |
|
"loss": 1.9144, |
|
"step": 68744 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.7082182168960571, |
|
"eval_runtime": 71.3486, |
|
"eval_samples_per_second": 963.089, |
|
"eval_steps_per_second": 30.106, |
|
"step": 68748 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.8000861115959738e-05, |
|
"loss": 1.7926, |
|
"step": 85930 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.614298701286316, |
|
"eval_runtime": 72.4304, |
|
"eval_samples_per_second": 948.703, |
|
"eval_steps_per_second": 29.656, |
|
"step": 85935 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 1.7601047303194276e-05, |
|
"loss": 1.7014, |
|
"step": 103116 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.5449920892715454, |
|
"eval_runtime": 71.3467, |
|
"eval_samples_per_second": 963.114, |
|
"eval_steps_per_second": 30.107, |
|
"step": 103122 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 1.7201210217024496e-05, |
|
"loss": 1.6306, |
|
"step": 120302 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.4825724363327026, |
|
"eval_runtime": 71.2883, |
|
"eval_samples_per_second": 963.903, |
|
"eval_steps_per_second": 30.131, |
|
"step": 120309 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"learning_rate": 1.6801373130854716e-05, |
|
"loss": 1.5745, |
|
"step": 137488 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 1.4376918077468872, |
|
"eval_runtime": 72.4942, |
|
"eval_samples_per_second": 947.869, |
|
"eval_steps_per_second": 29.63, |
|
"step": 137496 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"learning_rate": 1.6401559318089254e-05, |
|
"loss": 1.5262, |
|
"step": 154674 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 1.3938175439834595, |
|
"eval_runtime": 71.2189, |
|
"eval_samples_per_second": 964.842, |
|
"eval_steps_per_second": 30.161, |
|
"step": 154683 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.6001722231919474e-05, |
|
"loss": 1.4839, |
|
"step": 171860 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 1.363573431968689, |
|
"eval_runtime": 71.173, |
|
"eval_samples_per_second": 965.465, |
|
"eval_steps_per_second": 30.18, |
|
"step": 171870 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"learning_rate": 1.5601908419154012e-05, |
|
"loss": 1.4505, |
|
"step": 189046 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 1.3312129974365234, |
|
"eval_runtime": 71.256, |
|
"eval_samples_per_second": 964.339, |
|
"eval_steps_per_second": 30.145, |
|
"step": 189057 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"learning_rate": 1.5202071332984234e-05, |
|
"loss": 1.4187, |
|
"step": 206232 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 1.3099381923675537, |
|
"eval_runtime": 71.1495, |
|
"eval_samples_per_second": 965.783, |
|
"eval_steps_per_second": 30.19, |
|
"step": 206244 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"learning_rate": 1.480225752021877e-05, |
|
"loss": 1.39, |
|
"step": 223418 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 1.2823187112808228, |
|
"eval_runtime": 69.4163, |
|
"eval_samples_per_second": 989.898, |
|
"eval_steps_per_second": 30.944, |
|
"step": 223431 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"learning_rate": 1.4402420434048992e-05, |
|
"loss": 1.3651, |
|
"step": 240604 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 1.2610409259796143, |
|
"eval_runtime": 69.462, |
|
"eval_samples_per_second": 989.246, |
|
"eval_steps_per_second": 30.923, |
|
"step": 240618 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 1.4002583347879212e-05, |
|
"loss": 1.3436, |
|
"step": 257790 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 1.2433568239212036, |
|
"eval_runtime": 69.4357, |
|
"eval_samples_per_second": 989.621, |
|
"eval_steps_per_second": 30.935, |
|
"step": 257805 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"learning_rate": 1.3602722988305115e-05, |
|
"loss": 1.3245, |
|
"step": 274976 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 1.2258530855178833, |
|
"eval_runtime": 69.4442, |
|
"eval_samples_per_second": 989.499, |
|
"eval_steps_per_second": 30.931, |
|
"step": 274992 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"learning_rate": 1.3202885902135337e-05, |
|
"loss": 1.305, |
|
"step": 292162 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 1.2092918157577515, |
|
"eval_runtime": 69.4339, |
|
"eval_samples_per_second": 989.646, |
|
"eval_steps_per_second": 30.936, |
|
"step": 292179 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"learning_rate": 1.2803072089369875e-05, |
|
"loss": 1.2878, |
|
"step": 309348 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 1.1935821771621704, |
|
"eval_runtime": 69.4288, |
|
"eval_samples_per_second": 989.72, |
|
"eval_steps_per_second": 30.938, |
|
"step": 309366 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"learning_rate": 1.2403304823413045e-05, |
|
"loss": 1.2716, |
|
"step": 326534 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 1.1804686784744263, |
|
"eval_runtime": 69.4238, |
|
"eval_samples_per_second": 989.791, |
|
"eval_steps_per_second": 30.94, |
|
"step": 326553 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 1.2003421190434631e-05, |
|
"loss": 1.2577, |
|
"step": 343720 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 1.170041799545288, |
|
"eval_runtime": 69.4213, |
|
"eval_samples_per_second": 989.826, |
|
"eval_steps_per_second": 30.942, |
|
"step": 343740 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"learning_rate": 1.1603584104264853e-05, |
|
"loss": 1.2451, |
|
"step": 360906 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 1.1530039310455322, |
|
"eval_runtime": 69.5559, |
|
"eval_samples_per_second": 987.911, |
|
"eval_steps_per_second": 30.882, |
|
"step": 360927 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"learning_rate": 1.1203747018095073e-05, |
|
"loss": 1.2312, |
|
"step": 378092 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 1.1468385457992554, |
|
"eval_runtime": 70.6551, |
|
"eval_samples_per_second": 972.542, |
|
"eval_steps_per_second": 30.401, |
|
"step": 378114 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"learning_rate": 1.0803933205329611e-05, |
|
"loss": 1.2189, |
|
"step": 395278 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 1.1345593929290771, |
|
"eval_runtime": 69.4572, |
|
"eval_samples_per_second": 989.315, |
|
"eval_steps_per_second": 30.926, |
|
"step": 395301 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"learning_rate": 1.040411939256415e-05, |
|
"loss": 1.2081, |
|
"step": 412464 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 1.1272401809692383, |
|
"eval_runtime": 69.4251, |
|
"eval_samples_per_second": 989.772, |
|
"eval_steps_per_second": 30.94, |
|
"step": 412488 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 1.0004305579798686e-05, |
|
"loss": 1.1972, |
|
"step": 429650 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 1.1170649528503418, |
|
"eval_runtime": 69.594, |
|
"eval_samples_per_second": 987.37, |
|
"eval_steps_per_second": 30.865, |
|
"step": 429675 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"learning_rate": 9.604491767033224e-06, |
|
"loss": 1.187, |
|
"step": 446836 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 1.1084502935409546, |
|
"eval_runtime": 69.4386, |
|
"eval_samples_per_second": 989.579, |
|
"eval_steps_per_second": 30.934, |
|
"step": 446862 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"learning_rate": 9.204654680863444e-06, |
|
"loss": 1.1777, |
|
"step": 464022 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 1.100696086883545, |
|
"eval_runtime": 69.6537, |
|
"eval_samples_per_second": 986.524, |
|
"eval_steps_per_second": 30.838, |
|
"step": 464049 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"learning_rate": 8.804840868097981e-06, |
|
"loss": 1.1691, |
|
"step": 481208 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 1.0973334312438965, |
|
"eval_runtime": 69.4707, |
|
"eval_samples_per_second": 989.122, |
|
"eval_steps_per_second": 30.92, |
|
"step": 481236 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"learning_rate": 8.40502705533252e-06, |
|
"loss": 1.1611, |
|
"step": 498394 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 1.0852068662643433, |
|
"eval_runtime": 69.6222, |
|
"eval_samples_per_second": 986.969, |
|
"eval_steps_per_second": 30.852, |
|
"step": 498423 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 8.005189969162739e-06, |
|
"loss": 1.1534, |
|
"step": 515580 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 1.082985520362854, |
|
"eval_runtime": 69.4202, |
|
"eval_samples_per_second": 989.842, |
|
"eval_steps_per_second": 30.942, |
|
"step": 515610 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"learning_rate": 7.6053761563972775e-06, |
|
"loss": 1.1453, |
|
"step": 532766 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 1.0722641944885254, |
|
"eval_runtime": 69.4459, |
|
"eval_samples_per_second": 989.476, |
|
"eval_steps_per_second": 30.931, |
|
"step": 532797 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"learning_rate": 7.205539070227498e-06, |
|
"loss": 1.1387, |
|
"step": 549952 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 1.0714243650436401, |
|
"eval_runtime": 69.4689, |
|
"eval_samples_per_second": 989.147, |
|
"eval_steps_per_second": 30.92, |
|
"step": 549984 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"learning_rate": 6.805678710653402e-06, |
|
"loss": 1.1304, |
|
"step": 567138 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 1.0586808919906616, |
|
"eval_runtime": 69.5198, |
|
"eval_samples_per_second": 988.423, |
|
"eval_steps_per_second": 30.898, |
|
"step": 567171 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"learning_rate": 6.405864897887939e-06, |
|
"loss": 1.1243, |
|
"step": 584324 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 1.0512875318527222, |
|
"eval_runtime": 69.4412, |
|
"eval_samples_per_second": 989.542, |
|
"eval_steps_per_second": 30.933, |
|
"step": 584358 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 6.006051085122476e-06, |
|
"loss": 1.1192, |
|
"step": 601510 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 1.0537949800491333, |
|
"eval_runtime": 69.726, |
|
"eval_samples_per_second": 985.5, |
|
"eval_steps_per_second": 30.806, |
|
"step": 601545 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"learning_rate": 5.606213998952698e-06, |
|
"loss": 1.1126, |
|
"step": 618696 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 1.049811840057373, |
|
"eval_runtime": 69.9084, |
|
"eval_samples_per_second": 982.929, |
|
"eval_steps_per_second": 30.726, |
|
"step": 618732 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"learning_rate": 5.206446732995869e-06, |
|
"loss": 1.1083, |
|
"step": 635882 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 1.0433541536331177, |
|
"eval_runtime": 69.7631, |
|
"eval_samples_per_second": 984.977, |
|
"eval_steps_per_second": 30.79, |
|
"step": 635919 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"learning_rate": 4.8066096468260895e-06, |
|
"loss": 1.1037, |
|
"step": 653068 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 1.0362184047698975, |
|
"eval_runtime": 69.5998, |
|
"eval_samples_per_second": 987.287, |
|
"eval_steps_per_second": 30.862, |
|
"step": 653106 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"learning_rate": 4.4067492872519926e-06, |
|
"loss": 1.0997, |
|
"step": 670254 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 1.037041187286377, |
|
"eval_runtime": 69.6134, |
|
"eval_samples_per_second": 987.095, |
|
"eval_steps_per_second": 30.856, |
|
"step": 670293 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 4.006935474486531e-06, |
|
"loss": 1.0952, |
|
"step": 687440 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 1.0285437107086182, |
|
"eval_runtime": 69.7372, |
|
"eval_samples_per_second": 985.341, |
|
"eval_steps_per_second": 30.801, |
|
"step": 687480 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"learning_rate": 3.6071216617210684e-06, |
|
"loss": 1.0891, |
|
"step": 704626 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 1.0275565385818481, |
|
"eval_runtime": 69.6129, |
|
"eval_samples_per_second": 987.101, |
|
"eval_steps_per_second": 30.856, |
|
"step": 704667 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"learning_rate": 3.207261302146972e-06, |
|
"loss": 1.086, |
|
"step": 721812 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 1.0269238948822021, |
|
"eval_runtime": 69.6407, |
|
"eval_samples_per_second": 986.708, |
|
"eval_steps_per_second": 30.844, |
|
"step": 721854 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"learning_rate": 2.8074707627858266e-06, |
|
"loss": 1.0831, |
|
"step": 738998 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 1.0213665962219238, |
|
"eval_runtime": 69.6166, |
|
"eval_samples_per_second": 987.049, |
|
"eval_steps_per_second": 30.855, |
|
"step": 739041 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"learning_rate": 2.4076569500203645e-06, |
|
"loss": 1.0791, |
|
"step": 756184 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 1.020618200302124, |
|
"eval_runtime": 69.8024, |
|
"eval_samples_per_second": 984.422, |
|
"eval_steps_per_second": 30.773, |
|
"step": 756228 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 2.007819863850585e-06, |
|
"loss": 1.077, |
|
"step": 773370 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 1.0169862508773804, |
|
"eval_runtime": 69.6571, |
|
"eval_samples_per_second": 986.475, |
|
"eval_steps_per_second": 30.837, |
|
"step": 773415 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"learning_rate": 1.608052597893757e-06, |
|
"loss": 1.0739, |
|
"step": 790556 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 1.0156688690185547, |
|
"eval_runtime": 69.7503, |
|
"eval_samples_per_second": 985.157, |
|
"eval_steps_per_second": 30.796, |
|
"step": 790602 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"learning_rate": 1.2081922383196604e-06, |
|
"loss": 1.0709, |
|
"step": 807742 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 1.0143157243728638, |
|
"eval_runtime": 69.6201, |
|
"eval_samples_per_second": 986.999, |
|
"eval_steps_per_second": 30.853, |
|
"step": 807789 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"learning_rate": 8.084249723628325e-07, |
|
"loss": 1.0693, |
|
"step": 824928 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 1.012279748916626, |
|
"eval_runtime": 69.6307, |
|
"eval_samples_per_second": 986.849, |
|
"eval_steps_per_second": 30.848, |
|
"step": 824976 |
|
} |
|
], |
|
"max_steps": 859350, |
|
"num_train_epochs": 50, |
|
"total_flos": 1.7374699189290516e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|