|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.6925207756232687, |
|
"eval_steps": 100, |
|
"global_step": 1000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 6.8181818181818174e-06, |
|
"loss": 0.2935, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1.3636363636363635e-05, |
|
"loss": 0.3591, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.045454545454545e-05, |
|
"loss": 0.3512, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2.727272727272727e-05, |
|
"loss": 0.3309, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 3.4090909090909085e-05, |
|
"loss": 0.2684, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.09090909090909e-05, |
|
"loss": 0.2415, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4.7727272727272724e-05, |
|
"loss": 0.3725, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 5.454545454545454e-05, |
|
"loss": 0.2985, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.136363636363636e-05, |
|
"loss": 0.2536, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6.818181818181817e-05, |
|
"loss": 0.2281, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 7.5e-05, |
|
"loss": 0.2784, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.18181818181818e-05, |
|
"loss": 0.3719, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8.863636363636364e-05, |
|
"loss": 0.2899, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 9.545454545454545e-05, |
|
"loss": 0.2462, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00010227272727272726, |
|
"loss": 0.2554, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00010909090909090908, |
|
"loss": 0.332, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001159090909090909, |
|
"loss": 0.2812, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00012272727272727272, |
|
"loss": 0.2226, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00012954545454545453, |
|
"loss": 0.2564, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00013636363636363634, |
|
"loss": 0.3411, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00014318181818181818, |
|
"loss": 0.3666, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00015, |
|
"loss": 0.243, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001568181818181818, |
|
"loss": 0.239, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001636363636363636, |
|
"loss": 0.2591, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00017045454545454547, |
|
"loss": 0.2167, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00017727272727272728, |
|
"loss": 0.3029, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00018409090909090909, |
|
"loss": 0.3964, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001909090909090909, |
|
"loss": 0.3917, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0001977272727272727, |
|
"loss": 0.2781, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002045454545454545, |
|
"loss": 0.247, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00021136363636363635, |
|
"loss": 0.3181, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00021818181818181816, |
|
"loss": 0.3593, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.000225, |
|
"loss": 0.3164, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002318181818181818, |
|
"loss": 0.3297, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002386363636363636, |
|
"loss": 0.3638, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00024545454545454545, |
|
"loss": 0.298, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002522727272727273, |
|
"loss": 0.3642, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00025909090909090907, |
|
"loss": 0.2459, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002659090909090909, |
|
"loss": 0.4347, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002727272727272727, |
|
"loss": 0.232, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002795454545454545, |
|
"loss": 0.265, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00028636363636363636, |
|
"loss": 0.2941, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029318181818181814, |
|
"loss": 0.3151, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0003, |
|
"loss": 0.3219, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002999996223367247, |
|
"loss": 0.3614, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002999984893488008, |
|
"loss": 0.3071, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029999660104193315, |
|
"loss": 0.3223, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029999395742563066, |
|
"loss": 0.3473, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00029999055851320506, |
|
"loss": 0.301, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.0002999864043217717, |
|
"loss": 0.3275, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002999814948722491, |
|
"loss": 0.3894, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002999758301893587, |
|
"loss": 0.3163, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029996941030162517, |
|
"loss": 0.2869, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000299962235241376, |
|
"loss": 0.3036, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029995430504474105, |
|
"loss": 0.2893, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029994561975165313, |
|
"loss": 0.3469, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029993617940584705, |
|
"loss": 0.2517, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002999259840548597, |
|
"loss": 0.2794, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029991503375003, |
|
"loss": 0.3508, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002999033285464982, |
|
"loss": 0.317, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.000299890868503206, |
|
"loss": 0.313, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029987765368289616, |
|
"loss": 0.3155, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00029986368415211196, |
|
"loss": 0.3203, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0002998489599811972, |
|
"loss": 0.254, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002998334812442955, |
|
"loss": 0.3749, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029981724801935025, |
|
"loss": 0.2615, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029980026038810395, |
|
"loss": 0.3369, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029978251843609815, |
|
"loss": 0.3073, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029976402225267247, |
|
"loss": 0.3998, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002997447719309647, |
|
"loss": 0.3852, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002997247675679101, |
|
"loss": 0.3341, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002997040092642407, |
|
"loss": 0.4505, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029968249712448525, |
|
"loss": 0.3565, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029966023125696837, |
|
"loss": 0.2858, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029963721177381, |
|
"loss": 0.2831, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002996134387909251, |
|
"loss": 0.3214, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0002995889124280226, |
|
"loss": 0.3949, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029956363280860535, |
|
"loss": 0.2967, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00029953760005996916, |
|
"loss": 0.3067, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029951081431320217, |
|
"loss": 0.4727, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029948327570318426, |
|
"loss": 0.3245, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002994549843685864, |
|
"loss": 0.2191, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029942594045187, |
|
"loss": 0.283, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029939614409928584, |
|
"loss": 0.3527, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002993655954608738, |
|
"loss": 0.3712, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.000299334294690462, |
|
"loss": 0.3313, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029930224194566564, |
|
"loss": 0.2698, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029926943738788666, |
|
"loss": 0.2209, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002992358811823128, |
|
"loss": 0.3313, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0002992015734979167, |
|
"loss": 0.3474, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029916651450745505, |
|
"loss": 0.2964, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029913070438746767, |
|
"loss": 0.3489, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00029909414331827697, |
|
"loss": 0.2803, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002990568314839864, |
|
"loss": 0.3481, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00029901876907248014, |
|
"loss": 0.2366, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002989799562754219, |
|
"loss": 0.3413, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00029894039328825394, |
|
"loss": 0.3224, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000298900080310196, |
|
"loss": 0.3207, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002988590175442446, |
|
"loss": 0.3361, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002988172051971717, |
|
"loss": 0.3555, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"eval_loss": 0.3661580979824066, |
|
"eval_runtime": 430.9338, |
|
"eval_samples_per_second": 2.167, |
|
"eval_steps_per_second": 0.272, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00029877464347952365, |
|
"loss": 0.2807, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00029873133260562067, |
|
"loss": 0.3233, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00029868727279355495, |
|
"loss": 0.3436, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002986424642651902, |
|
"loss": 0.3557, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002985969072461602, |
|
"loss": 0.3424, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00029855060196586774, |
|
"loss": 0.3835, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0002985035486574836, |
|
"loss": 0.2908, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00029845574755794517, |
|
"loss": 0.3522, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029840719890795547, |
|
"loss": 0.2667, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002983579029519816, |
|
"loss": 0.3424, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029830785993825395, |
|
"loss": 0.28, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002982570701187646, |
|
"loss": 0.3091, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002982055337492662, |
|
"loss": 0.2888, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029815325108927063, |
|
"loss": 0.3948, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002981002224020478, |
|
"loss": 0.3093, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029804644795462435, |
|
"loss": 0.314, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002979919280177818, |
|
"loss": 0.3129, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0002979366628660561, |
|
"loss": 0.3162, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029788065277773534, |
|
"loss": 0.3045, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.000297823898034859, |
|
"loss": 0.4251, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029776639892321606, |
|
"loss": 0.382, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00029770815573234395, |
|
"loss": 0.3747, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002976491687555269, |
|
"loss": 0.2517, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029758943828979444, |
|
"loss": 0.2771, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029752896463591984, |
|
"loss": 0.331, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002974677480984189, |
|
"loss": 0.3161, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029740578898554816, |
|
"loss": 0.3053, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002973430876093033, |
|
"loss": 0.2365, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002972796442854177, |
|
"loss": 0.2687, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029721545933336104, |
|
"loss": 0.2488, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002971505330763372, |
|
"loss": 0.3202, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.000297084865841283, |
|
"loss": 0.3289, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00029701845795886654, |
|
"loss": 0.3061, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002969513097634853, |
|
"loss": 0.3474, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002968834215932648, |
|
"loss": 0.27, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002968147937900567, |
|
"loss": 0.2733, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0002967454266994368, |
|
"loss": 0.3651, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000296675320670704, |
|
"loss": 0.3448, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00029660447605687785, |
|
"loss": 0.2632, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002965328932146971, |
|
"loss": 0.3662, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.000296460572504618, |
|
"loss": 0.3963, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002963875142908121, |
|
"loss": 0.3186, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002963137189411649, |
|
"loss": 0.3109, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002962391868272735, |
|
"loss": 0.4337, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002961639183244453, |
|
"loss": 0.3443, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002960879138116956, |
|
"loss": 0.347, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00029601117367174576, |
|
"loss": 0.2632, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002959336982910217, |
|
"loss": 0.2897, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002958554880596515, |
|
"loss": 0.317, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002957765433714634, |
|
"loss": 0.3383, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0002956968646239845, |
|
"loss": 0.3299, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002956164522184377, |
|
"loss": 0.2869, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00029553530655974063, |
|
"loss": 0.2665, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.000295453428056503, |
|
"loss": 0.3833, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002953708171210249, |
|
"loss": 0.2955, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00029528747416929463, |
|
"loss": 0.307, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002952033996209864, |
|
"loss": 0.3072, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00029511859389945845, |
|
"loss": 0.3135, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002950330574317509, |
|
"loss": 0.2607, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002949467906485836, |
|
"loss": 0.3032, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00029485979398435365, |
|
"loss": 0.4594, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002947720678771338, |
|
"loss": 0.3875, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002946836127686697, |
|
"loss": 0.2992, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00029459442910437797, |
|
"loss": 0.3454, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002945045173333439, |
|
"loss": 0.3506, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.0002944138779083191, |
|
"loss": 0.3296, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002943225112857194, |
|
"loss": 0.324, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002942304179256223, |
|
"loss": 0.3531, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002941375982917649, |
|
"loss": 0.2671, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002940440528515414, |
|
"loss": 0.3906, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002939497820760009, |
|
"loss": 0.3454, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.0002938547864398448, |
|
"loss": 0.3434, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00029375906642142467, |
|
"loss": 0.2481, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00029366262250273955, |
|
"loss": 0.335, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00029356545516943385, |
|
"loss": 0.2842, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00029346756491079474, |
|
"loss": 0.3256, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00029336895221974946, |
|
"loss": 0.3419, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00029326961759286343, |
|
"loss": 0.2448, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00029316956153033707, |
|
"loss": 0.2276, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00029306878453600377, |
|
"loss": 0.3307, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00029296728711732714, |
|
"loss": 0.2659, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002928650697853984, |
|
"loss": 0.3261, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.000292762133054934, |
|
"loss": 0.3047, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00029265847744427303, |
|
"loss": 0.337, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002925541034753742, |
|
"loss": 0.2641, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002924490116738139, |
|
"loss": 0.3339, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00029234320256878285, |
|
"loss": 0.4126, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002922366766930839, |
|
"loss": 0.3095, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002921294345831293, |
|
"loss": 0.4463, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002920214767789377, |
|
"loss": 0.3103, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002919128038241318, |
|
"loss": 0.3747, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002918034162659353, |
|
"loss": 0.2582, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002916933146551705, |
|
"loss": 0.273, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0002915824995462551, |
|
"loss": 0.3291, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002914709714971998, |
|
"loss": 0.2405, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002913587310696052, |
|
"loss": 0.2769, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00029124577882865915, |
|
"loss": 0.2273, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002911321153431338, |
|
"loss": 0.4251, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00029101774118538284, |
|
"loss": 0.323, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00029090265693133854, |
|
"loss": 0.2576, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"eval_loss": 0.3576963245868683, |
|
"eval_runtime": 410.72, |
|
"eval_samples_per_second": 2.274, |
|
"eval_steps_per_second": 0.285, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00029078686316050874, |
|
"loss": 0.3654, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00029067036045597435, |
|
"loss": 0.342, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00029055314940438576, |
|
"loss": 0.2253, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00029043523059596053, |
|
"loss": 0.2887, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002903166046244801, |
|
"loss": 0.3511, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002901972720872866, |
|
"loss": 0.3639, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002900772335852804, |
|
"loss": 0.3686, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002899564897229166, |
|
"loss": 0.3076, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0002898350411082021, |
|
"loss": 0.3183, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028971288835269275, |
|
"loss": 0.2417, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002895900320714899, |
|
"loss": 0.3401, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028946647288323766, |
|
"loss": 0.4682, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002893422114101197, |
|
"loss": 0.2697, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002892172482778558, |
|
"loss": 0.2754, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002890915841156992, |
|
"loss": 0.2631, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002889652195564331, |
|
"loss": 0.3227, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028883815523636746, |
|
"loss": 0.3394, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028871039179533605, |
|
"loss": 0.2743, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.000288581929876693, |
|
"loss": 0.2247, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0002884527701273096, |
|
"loss": 0.3068, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028832291319757104, |
|
"loss": 0.4094, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028819235974137335, |
|
"loss": 0.3223, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00028806111041611976, |
|
"loss": 0.2296, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002879291658827176, |
|
"loss": 0.2486, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002877965268055749, |
|
"loss": 0.2759, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028766319385259713, |
|
"loss": 0.3165, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002875291676951838, |
|
"loss": 0.3755, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000287394449008225, |
|
"loss": 0.2859, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002872590384700979, |
|
"loss": 0.3506, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.000287122936762664, |
|
"loss": 0.3166, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028698614457126455, |
|
"loss": 0.3258, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028684866258471813, |
|
"loss": 0.2901, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028671049149531675, |
|
"loss": 0.3573, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028657163199882237, |
|
"loss": 0.364, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0002864320847944633, |
|
"loss": 0.2158, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028629185058493114, |
|
"loss": 0.3299, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028615093007637653, |
|
"loss": 0.3535, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00028600932397840616, |
|
"loss": 0.2786, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002858670330040791, |
|
"loss": 0.3313, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028572405786990294, |
|
"loss": 0.3865, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002855803992958304, |
|
"loss": 0.2902, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028543605800525584, |
|
"loss": 0.2914, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028529103472501124, |
|
"loss": 0.3619, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002851453301853628, |
|
"loss": 0.3738, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002849989451200074, |
|
"loss": 0.2853, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028485188026606834, |
|
"loss": 0.2604, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002847041363640923, |
|
"loss": 0.2208, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002845557141580453, |
|
"loss": 0.2787, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002844066143953088, |
|
"loss": 0.2579, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00028425683782667613, |
|
"loss": 0.3502, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002841063852063489, |
|
"loss": 0.2731, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.0002839552572919328, |
|
"loss": 0.3358, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028380345484443393, |
|
"loss": 0.2799, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028365097862825513, |
|
"loss": 0.2887, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028349782941119203, |
|
"loss": 0.3689, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028334400796442896, |
|
"loss": 0.2289, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002831895150625355, |
|
"loss": 0.3759, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002830343514834622, |
|
"loss": 0.2855, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028287851800853697, |
|
"loss": 0.3864, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028272201542246073, |
|
"loss": 0.2584, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028256484451330403, |
|
"loss": 0.3531, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028240700607250245, |
|
"loss": 0.2778, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002822485008948532, |
|
"loss": 0.3646, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028208932977851067, |
|
"loss": 0.3598, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002819294935249827, |
|
"loss": 0.3691, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00028176899293912643, |
|
"loss": 0.2941, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0002816078288291442, |
|
"loss": 0.3349, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002814460020065795, |
|
"loss": 0.3111, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00028128351328631304, |
|
"loss": 0.3575, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002811203634865584, |
|
"loss": 0.2307, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00028095655342885804, |
|
"loss": 0.2248, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00028079208393807934, |
|
"loss": 0.301, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00028062695584241, |
|
"loss": 0.2811, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002804611699733543, |
|
"loss": 0.2851, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00028029472716572867, |
|
"loss": 0.3758, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002801276282576576, |
|
"loss": 0.2958, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002799598740905694, |
|
"loss": 0.285, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002797914655091919, |
|
"loss": 0.3346, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002796224033615482, |
|
"loss": 0.3573, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0002794526884989525, |
|
"loss": 0.2312, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00027928232177600585, |
|
"loss": 0.3695, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002791113040505915, |
|
"loss": 0.3503, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002789396361838711, |
|
"loss": 0.359, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002787673190402799, |
|
"loss": 0.3637, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00027859435348752263, |
|
"loss": 0.3154, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00027842074039656923, |
|
"loss": 0.3313, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002782464806416501, |
|
"loss": 0.2544, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00027807157510025205, |
|
"loss": 0.3054, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002778960246531138, |
|
"loss": 0.423, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00027771983018422145, |
|
"loss": 0.2778, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00027754299258080393, |
|
"loss": 0.3546, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.000277365512733329, |
|
"loss": 0.3297, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00027718739153549817, |
|
"loss": 0.2973, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002770086298842426, |
|
"loss": 0.3164, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0002768292286797184, |
|
"loss": 0.241, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00027664918882530225, |
|
"loss": 0.3432, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002764685112275866, |
|
"loss": 0.3224, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002762871967963755, |
|
"loss": 0.2248, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002761052464446795, |
|
"loss": 0.2908, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00027592266108871156, |
|
"loss": 0.3887, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"eval_loss": 0.3512147068977356, |
|
"eval_runtime": 416.8184, |
|
"eval_samples_per_second": 2.241, |
|
"eval_steps_per_second": 0.281, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002757394416478821, |
|
"loss": 0.2728, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002755555890447945, |
|
"loss": 0.3177, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002753711042052405, |
|
"loss": 0.3394, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002751859880581954, |
|
"loss": 0.2704, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002750002415358134, |
|
"loss": 0.2746, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00027481386557342305, |
|
"loss": 0.3615, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002746268611095225, |
|
"loss": 0.2759, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0002744392290857747, |
|
"loss": 0.3264, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00027425097044700246, |
|
"loss": 0.2578, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00027406208614118424, |
|
"loss": 0.2866, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002738725771194489, |
|
"loss": 0.3237, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002736824443360711, |
|
"loss": 0.4085, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00027349168874846636, |
|
"loss": 0.2564, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00027330031131718647, |
|
"loss": 0.345, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00027310831300591447, |
|
"loss": 0.3996, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002729156947814598, |
|
"loss": 0.2875, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002727224576137535, |
|
"loss": 0.3282, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00027252860247584343, |
|
"loss": 0.3408, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.000272334130343889, |
|
"loss": 0.2905, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00027213904219715676, |
|
"loss": 0.3149, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00027194333901801507, |
|
"loss": 0.3763, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002717470217919293, |
|
"loss": 0.2918, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00027155009150745674, |
|
"loss": 0.364, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.0002713525491562421, |
|
"loss": 0.2722, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002711543957330118, |
|
"loss": 0.3424, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002709556322355694, |
|
"loss": 0.3069, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00027075625966479074, |
|
"loss": 0.2508, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002705562790246183, |
|
"loss": 0.2991, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002703556913220567, |
|
"loss": 0.4025, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00027015449756716736, |
|
"loss": 0.2895, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00026995269877306356, |
|
"loss": 0.3384, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002697502959559052, |
|
"loss": 0.3113, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00026954729013489373, |
|
"loss": 0.3183, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00026934368233226714, |
|
"loss": 0.3079, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00026913947357329457, |
|
"loss": 0.2294, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002689346648862713, |
|
"loss": 0.3304, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0002687292573025136, |
|
"loss": 0.2668, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00026852325185635354, |
|
"loss": 0.367, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00026831664958513367, |
|
"loss": 0.2825, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00026810945152920177, |
|
"loss": 0.2546, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00026790165873190585, |
|
"loss": 0.3059, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002676932722395888, |
|
"loss": 0.2582, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00026748429310158273, |
|
"loss": 0.2572, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00026727472237020447, |
|
"loss": 0.2955, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00026706456110074943, |
|
"loss": 0.2978, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.000266853810351487, |
|
"loss": 0.3181, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00026664247118365463, |
|
"loss": 0.2878, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00026643054466145294, |
|
"loss": 0.3152, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00026621803185204013, |
|
"loss": 0.3325, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00026600493382552664, |
|
"loss": 0.2938, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00026579125165496984, |
|
"loss": 0.2782, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00026557698641636835, |
|
"loss": 0.2884, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0002653621391886571, |
|
"loss": 0.3014, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00026514671105370166, |
|
"loss": 0.3679, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002649307030962924, |
|
"loss": 0.2563, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00026471411640413973, |
|
"loss": 0.3266, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002644969520678682, |
|
"loss": 0.293, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00026427921118101096, |
|
"loss": 0.3993, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00026406089484000466, |
|
"loss": 0.2686, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002638420041441833, |
|
"loss": 0.284, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002636225401957734, |
|
"loss": 0.256, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002634025040998879, |
|
"loss": 0.2845, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00026318189696452075, |
|
"loss": 0.3791, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00026296071990054165, |
|
"loss": 0.3166, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002627389740216898, |
|
"loss": 0.2813, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.0002625166604445689, |
|
"loss": 0.3218, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00026229378028864135, |
|
"loss": 0.3286, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00026207033467622243, |
|
"loss": 0.2546, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002618463247324748, |
|
"loss": 0.252, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00026162175158540293, |
|
"loss": 0.3465, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00026139661636584714, |
|
"loss": 0.3192, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002611709202074782, |
|
"loss": 0.2571, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00026094466424679146, |
|
"loss": 0.3023, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002607178496231011, |
|
"loss": 0.2731, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002604904774785345, |
|
"loss": 0.2513, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002602625489580265, |
|
"loss": 0.3269, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002600340652093135, |
|
"loss": 0.2539, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002598050273829278, |
|
"loss": 0.3323, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002595754366321916, |
|
"loss": 0.2298, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002593452941132117, |
|
"loss": 0.2867, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00025911460098487305, |
|
"loss": 0.3437, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0002588833584088333, |
|
"loss": 0.339, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002586515675495167, |
|
"loss": 0.3775, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002584192295741087, |
|
"loss": 0.3152, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00025818634565254954, |
|
"loss": 0.271, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00025795291695752856, |
|
"loss": 0.2948, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002577189446644783, |
|
"loss": 0.2434, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002574844299515688, |
|
"loss": 0.3804, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002572493739997013, |
|
"loss": 0.3665, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002570137779925024, |
|
"loss": 0.2785, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00025677764311631825, |
|
"loss": 0.2946, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002565409705602085, |
|
"loss": 0.3341, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002563037615159403, |
|
"loss": 0.219, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00025606601717798207, |
|
"loss": 0.3707, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00025582773874349804, |
|
"loss": 0.2568, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.0002555889274123417, |
|
"loss": 0.2648, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00025534958438704997, |
|
"loss": 0.3418, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00025510971087283705, |
|
"loss": 0.1983, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002548693080775886, |
|
"loss": 0.3219, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00025462837721185535, |
|
"loss": 0.3084, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"eval_loss": 0.33674970269203186, |
|
"eval_runtime": 416.1091, |
|
"eval_samples_per_second": 2.245, |
|
"eval_steps_per_second": 0.281, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002543869194888471, |
|
"loss": 0.3648, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002541449361244268, |
|
"loss": 0.3832, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002539024283371042, |
|
"loss": 0.3058, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002536593973480297, |
|
"loss": 0.2936, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00025341584438098836, |
|
"loss": 0.3428, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00025317177066239366, |
|
"loss": 0.253, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002529271774212814, |
|
"loss": 0.2826, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002526820658893033, |
|
"loss": 0.2316, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.000252436437300721, |
|
"loss": 0.2631, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0002521902928923999, |
|
"loss": 0.2844, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00025194363390380263, |
|
"loss": 0.2828, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002516964615769831, |
|
"loss": 0.2879, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002514487771565801, |
|
"loss": 0.2843, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00025120058188981116, |
|
"loss": 0.2321, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00025095187702646595, |
|
"loss": 0.3335, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00025070266381890056, |
|
"loss": 0.2888, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002504529435220306, |
|
"loss": 0.3596, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002502027173933252, |
|
"loss": 0.2776, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002499519866928006, |
|
"loss": 0.2877, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00024970075268301384, |
|
"loss": 0.3577, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002494490166290564, |
|
"loss": 0.2311, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00024919677979854776, |
|
"loss": 0.2853, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002489440434616291, |
|
"loss": 0.3409, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002486908088909569, |
|
"loss": 0.334, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0002484370773616964, |
|
"loss": 0.2979, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00024818285015151527, |
|
"loss": 0.2281, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002479281285405774, |
|
"loss": 0.3019, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.000247672913811536, |
|
"loss": 0.2755, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002474172072495275, |
|
"loss": 0.2195, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00024716101014216497, |
|
"loss": 0.308, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00024690432377953155, |
|
"loss": 0.3277, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00024664714945417415, |
|
"loss": 0.2556, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00024638948846109656, |
|
"loss": 0.295, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002461313420977536, |
|
"loss": 0.3255, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002458727116640438, |
|
"loss": 0.3477, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002456135984623034, |
|
"loss": 0.3645, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002453540037972997, |
|
"loss": 0.267, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0002450939289762242, |
|
"loss": 0.2784, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00024483337530868644, |
|
"loss": 0.2993, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.000244572344106707, |
|
"loss": 0.2219, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00024431083668471134, |
|
"loss": 0.3632, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00024404885435952265, |
|
"loss": 0.3313, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00024378639845035579, |
|
"loss": 0.3702, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00024352347027881003, |
|
"loss": 0.2512, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00024326007116886296, |
|
"loss": 0.3034, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00024299620244686357, |
|
"loss": 0.2334, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0002427318654415255, |
|
"loss": 0.3378, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00024246706148392045, |
|
"loss": 0.325, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00024220179190747156, |
|
"loss": 0.3234, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00024193605804794646, |
|
"loss": 0.3642, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00024166986124345076, |
|
"loss": 0.3163, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00024140320283442122, |
|
"loss": 0.3386, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00024113608416361898, |
|
"loss": 0.2377, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00024086850657612297, |
|
"loss": 0.2411, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00024060047141932276, |
|
"loss": 0.2512, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00024033198004291232, |
|
"loss": 0.2785, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002400630337988826, |
|
"loss": 0.3279, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00023979363404151525, |
|
"loss": 0.2682, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00023952378212737554, |
|
"loss": 0.2706, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00023925347941530556, |
|
"loss": 0.1878, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00023898272726641741, |
|
"loss": 0.3695, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002387115270440863, |
|
"loss": 0.3103, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.0002384398801139439, |
|
"loss": 0.3707, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00023816778784387094, |
|
"loss": 0.2876, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00023789525160399096, |
|
"loss": 0.2444, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.000237622272766663, |
|
"loss": 0.2575, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00023734885270647484, |
|
"loss": 0.2286, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.000237074992800236, |
|
"loss": 0.2691, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00023680069442697088, |
|
"loss": 0.2277, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002365259589679117, |
|
"loss": 0.3302, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00023625078780649178, |
|
"loss": 0.2591, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00023597518232833833, |
|
"loss": 0.2451, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00023569914392126558, |
|
"loss": 0.3542, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00023542267397526776, |
|
"loss": 0.2733, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00023514577388251224, |
|
"loss": 0.3362, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00023486844503733216, |
|
"loss": 0.356, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00023459068883621985, |
|
"loss": 0.2722, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00023431250667781958, |
|
"loss": 0.303, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002340338999629204, |
|
"loss": 0.2613, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00023375487009444934, |
|
"loss": 0.3344, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00023347541847746424, |
|
"loss": 0.2513, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0002331955465191465, |
|
"loss": 0.3072, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00023291525562879435, |
|
"loss": 0.3652, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00023263454721781537, |
|
"loss": 0.3671, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00023235342269971974, |
|
"loss": 0.2753, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00023207188349011286, |
|
"loss": 0.2894, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00023178993100668837, |
|
"loss": 0.2629, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00023150756666922087, |
|
"loss": 0.283, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00023122479189955885, |
|
"loss": 0.1945, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00023094160812161765, |
|
"loss": 0.2772, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00023065801676137217, |
|
"loss": 0.2786, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00023037401924684946, |
|
"loss": 0.3114, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.0002300896170081221, |
|
"loss": 0.2696, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00022980481147730046, |
|
"loss": 0.2742, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00022951960408852575, |
|
"loss": 0.3294, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00022923399627796265, |
|
"loss": 0.2375, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00022894798948379228, |
|
"loss": 0.2628, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00022866158514620483, |
|
"loss": 0.3652, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002283747847073923, |
|
"loss": 0.3074, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00022808758961154129, |
|
"loss": 0.2767, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"eval_loss": 0.32663506269454956, |
|
"eval_runtime": 430.4483, |
|
"eval_samples_per_second": 2.17, |
|
"eval_steps_per_second": 0.272, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00022780000130482559, |
|
"loss": 0.3318, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00022751202123539915, |
|
"loss": 0.245, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00022722365085338858, |
|
"loss": 0.288, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002269348916108859, |
|
"loss": 0.2872, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0002266457449619413, |
|
"loss": 0.2428, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00022635621236255567, |
|
"loss": 0.2677, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00022606629527067344, |
|
"loss": 0.318, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00022577599514617515, |
|
"loss": 0.3151, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00022548531345087, |
|
"loss": 0.2295, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00022519425164848874, |
|
"loss": 0.3515, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00022490281120467605, |
|
"loss": 0.3882, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00022461099358698328, |
|
"loss": 0.2507, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00022431880026486112, |
|
"loss": 0.3211, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.000224026232709652, |
|
"loss": 0.3136, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00022373329239458296, |
|
"loss": 0.2906, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00022343998079475786, |
|
"loss": 0.3354, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00022314629938715037, |
|
"loss": 0.2776, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00022285224965059623, |
|
"loss": 0.3228, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00022255783306578594, |
|
"loss": 0.2954, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00022226305111525726, |
|
"loss": 0.3041, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00022196790528338777, |
|
"loss": 0.2367, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00022167239705638732, |
|
"loss": 0.3199, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00022137652792229062, |
|
"loss": 0.2585, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.0002210802993709498, |
|
"loss": 0.2257, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00022078371289402677, |
|
"loss": 0.2149, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00022048676998498578, |
|
"loss": 0.282, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.000220189472139086, |
|
"loss": 0.2475, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002198918208533737, |
|
"loss": 0.2864, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002195938176266751, |
|
"loss": 0.2675, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00021929546395958846, |
|
"loss": 0.2711, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00021899676135447675, |
|
"loss": 0.296, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00021869771131546013, |
|
"loss": 0.3615, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00021839831534840802, |
|
"loss": 0.2738, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00021809857496093199, |
|
"loss": 0.3909, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00021779849166237786, |
|
"loss": 0.2874, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00021749806696381814, |
|
"loss": 0.201, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00021719730237804456, |
|
"loss": 0.2585, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00021689619941956024, |
|
"loss": 0.3246, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0002165947596045723, |
|
"loss": 0.2626, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.000216292984450984, |
|
"loss": 0.308, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00021599087547838727, |
|
"loss": 0.2015, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00021568843420805494, |
|
"loss": 0.3287, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00021538566216293317, |
|
"loss": 0.3935, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00021508256086763368, |
|
"loss": 0.2501, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00021477913184842629, |
|
"loss": 0.2809, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0002144753766332308, |
|
"loss": 0.3167, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00021417129675160995, |
|
"loss": 0.247, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00021386689373476087, |
|
"loss": 0.2614, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00021356216911550829, |
|
"loss": 0.2917, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00021325712442829613, |
|
"loss": 0.2902, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00021295176120918001, |
|
"loss": 0.3168, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00021264608099581963, |
|
"loss": 0.2715, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00021234008532747094, |
|
"loss": 0.2379, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00021203377574497808, |
|
"loss": 0.2701, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00021172715379076632, |
|
"loss": 0.315, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00021142022100883358, |
|
"loss": 0.2802, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00021111297894474307, |
|
"loss": 0.3304, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002108054291456154, |
|
"loss": 0.2461, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00021049757316012077, |
|
"loss": 0.2741, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00021018941253847113, |
|
"loss": 0.3782, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00020988094883241253, |
|
"loss": 0.2922, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00020957218359521706, |
|
"loss": 0.2909, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002092631183816754, |
|
"loss": 0.2427, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00020895375474808852, |
|
"loss": 0.3164, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00020864409425226023, |
|
"loss": 0.2946, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00020833413845348922, |
|
"loss": 0.2347, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.000208023888912561, |
|
"loss": 0.2967, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00020771334719174039, |
|
"loss": 0.3025, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00020740251485476345, |
|
"loss": 0.2864, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0002070913934668296, |
|
"loss": 0.2973, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00020677998459459385, |
|
"loss": 0.3408, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00020646828980615877, |
|
"loss": 0.3269, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002061563106710667, |
|
"loss": 0.2433, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00020584404876029176, |
|
"loss": 0.3203, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002055315056462321, |
|
"loss": 0.3005, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002052186829027017, |
|
"loss": 0.2235, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00020490558210492277, |
|
"loss": 0.322, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00020459220482951765, |
|
"loss": 0.3228, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00020427855265450081, |
|
"loss": 0.2743, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00020396462715927105, |
|
"loss": 0.2584, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002036504299246034, |
|
"loss": 0.3438, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00020333596253264125, |
|
"loss": 0.2986, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00020302122656688858, |
|
"loss": 0.2532, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0002027062236122014, |
|
"loss": 0.3165, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002023909552547804, |
|
"loss": 0.3445, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00020207542308216277, |
|
"loss": 0.223, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002017596286832139, |
|
"loss": 0.3348, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00020144357364811972, |
|
"loss": 0.2205, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00020112725956837873, |
|
"loss": 0.3823, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00020081068803679371, |
|
"loss": 0.3206, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00020049386064746383, |
|
"loss": 0.3496, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0002001767789957768, |
|
"loss": 0.3115, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019985944467840045, |
|
"loss": 0.3348, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019954185929327506, |
|
"loss": 0.2746, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019922402443960509, |
|
"loss": 0.3618, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019890594171785125, |
|
"loss": 0.2744, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001985876127297224, |
|
"loss": 0.2409, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019826903907816743, |
|
"loss": 0.2236, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001979502223673672, |
|
"loss": 0.1992, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019763116420272664, |
|
"loss": 0.2262, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"eval_loss": 0.3170485198497772, |
|
"eval_runtime": 435.7355, |
|
"eval_samples_per_second": 2.144, |
|
"eval_steps_per_second": 0.269, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001973118661908664, |
|
"loss": 0.2505, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019699232993961488, |
|
"loss": 0.2638, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019667255705800024, |
|
"loss": 0.3205, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001963525491562421, |
|
"loss": 0.2979, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019603230784574356, |
|
"loss": 0.2554, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019571183473908308, |
|
"loss": 0.2829, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019539113145000623, |
|
"loss": 0.2975, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019507019959341776, |
|
"loss": 0.3208, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001947490407853734, |
|
"loss": 0.2956, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019442765664307165, |
|
"loss": 0.2717, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019410604878484556, |
|
"loss": 0.2846, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019378421883015502, |
|
"loss": 0.2934, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019346216839957788, |
|
"loss": 0.3237, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019313989911480257, |
|
"loss": 0.3288, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019281741259861938, |
|
"loss": 0.3011, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019249471047491239, |
|
"loss": 0.3063, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001921717943686516, |
|
"loss": 0.2326, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019184866590588439, |
|
"loss": 0.3847, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019152532671372737, |
|
"loss": 0.2826, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019120177842035853, |
|
"loss": 0.3045, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019087802265500852, |
|
"loss": 0.2749, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001905540610479529, |
|
"loss": 0.2744, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019022989523050373, |
|
"loss": 0.1984, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00018990552683500125, |
|
"loss": 0.2372, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00018958095749480592, |
|
"loss": 0.2468, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00018925618884429, |
|
"loss": 0.2496, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00018893122251882936, |
|
"loss": 0.2924, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00018860606015479536, |
|
"loss": 0.2795, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00018828070338954638, |
|
"loss": 0.2819, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00018795515386141974, |
|
"loss": 0.2744, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001876294132097236, |
|
"loss": 0.2761, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00018730348307472824, |
|
"loss": 0.3291, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00018697736509765822, |
|
"loss": 0.2946, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00018665106092068408, |
|
"loss": 0.2336, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00018632457218691384, |
|
"loss": 0.2842, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00018599790054038486, |
|
"loss": 0.3071, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00018567104762605562, |
|
"loss": 0.3817, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001853440150897974, |
|
"loss": 0.4117, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001850168045783858, |
|
"loss": 0.2398, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00018468941773949282, |
|
"loss": 0.2952, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00018436185622167824, |
|
"loss": 0.3224, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00018403412167438147, |
|
"loss": 0.2629, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018370621574791322, |
|
"loss": 0.3025, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018337814009344714, |
|
"loss": 0.3066, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018304989636301166, |
|
"loss": 0.1789, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001827214862094814, |
|
"loss": 0.3176, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018239291128656907, |
|
"loss": 0.2573, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018206417324881716, |
|
"loss": 0.3049, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001817352737515894, |
|
"loss": 0.3125, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018140621445106263, |
|
"loss": 0.1662, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018107699700421845, |
|
"loss": 0.2741, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018074762306883457, |
|
"loss": 0.2982, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018041809430347684, |
|
"loss": 0.3202, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00018008841236749092, |
|
"loss": 0.2496, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00017975857892099347, |
|
"loss": 0.2719, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00017942859562486427, |
|
"loss": 0.307, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00017909846414073763, |
|
"loss": 0.3083, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.000178768186130994, |
|
"loss": 0.3173, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00017843776325875176, |
|
"loss": 0.2997, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001781071971878587, |
|
"loss": 0.2361, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00017777648958288363, |
|
"loss": 0.3227, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001774456421091082, |
|
"loss": 0.2766, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00017711465643251823, |
|
"loss": 0.2536, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00017678353421979548, |
|
"loss": 0.2971, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00017645227713830936, |
|
"loss": 0.2555, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001761208868561083, |
|
"loss": 0.3386, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001757893650419114, |
|
"loss": 0.2256, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00017545771336510032, |
|
"loss": 0.2979, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00017512593349571043, |
|
"loss": 0.2183, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00017479402710442274, |
|
"loss": 0.2082, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00017446199586255528, |
|
"loss": 0.2664, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00017412984144205476, |
|
"loss": 0.2694, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00017379756551548825, |
|
"loss": 0.2347, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00017346516975603462, |
|
"loss": 0.3269, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00017313265583747615, |
|
"loss": 0.2471, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001728000254341901, |
|
"loss": 0.3183, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00017246728022114025, |
|
"loss": 0.2714, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00017213442187386863, |
|
"loss": 0.2985, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001718014520684869, |
|
"loss": 0.2923, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001714683724816679, |
|
"loss": 0.206, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00017113518479063738, |
|
"loss": 0.2784, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001708018906731654, |
|
"loss": 0.3425, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00017046849180755793, |
|
"loss": 0.229, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00017013498987264832, |
|
"loss": 0.2058, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001698013865477891, |
|
"loss": 0.3237, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016946768351284327, |
|
"loss": 0.258, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016913388244817586, |
|
"loss": 0.2244, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016879998503464561, |
|
"loss": 0.2383, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016846599295359643, |
|
"loss": 0.2196, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016813190788684884, |
|
"loss": 0.3826, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016779773151669161, |
|
"loss": 0.3966, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0001674634655258734, |
|
"loss": 0.2805, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016712911159759406, |
|
"loss": 0.2495, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016679467141549617, |
|
"loss": 0.2076, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016646014666365675, |
|
"loss": 0.3564, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016612553902657876, |
|
"loss": 0.2004, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016579085018918224, |
|
"loss": 0.2654, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016545608183679637, |
|
"loss": 0.2892, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016512123565515065, |
|
"loss": 0.3076, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00016478631333036653, |
|
"loss": 0.2703, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"eval_loss": 0.3060886263847351, |
|
"eval_runtime": 432.8445, |
|
"eval_samples_per_second": 2.158, |
|
"eval_steps_per_second": 0.27, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00016445131654894876, |
|
"loss": 0.3737, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00016411624699777717, |
|
"loss": 0.196, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00016378110636409784, |
|
"loss": 0.2705, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00016344589633551502, |
|
"loss": 0.28, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00016311061859998222, |
|
"loss": 0.3013, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001627752748457939, |
|
"loss": 0.3199, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00016243986676157704, |
|
"loss": 0.2383, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001621043960362826, |
|
"loss": 0.2129, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00016176886435917675, |
|
"loss": 0.208, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00016143327341983275, |
|
"loss": 0.2469, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00016109762490812228, |
|
"loss": 0.2663, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00016076192051420687, |
|
"loss": 0.3564, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001604261619285295, |
|
"loss": 0.2445, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00016009035084180596, |
|
"loss": 0.2528, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015975448894501643, |
|
"loss": 0.2414, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.000159418577929397, |
|
"loss": 0.382, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015908261948643107, |
|
"loss": 0.2513, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001587466153078408, |
|
"loss": 0.2635, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015841056708557874, |
|
"loss": 0.2655, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015807447651181922, |
|
"loss": 0.274, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001577383452789497, |
|
"loss": 0.22, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015740217507956267, |
|
"loss": 0.309, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015706596760644637, |
|
"loss": 0.3358, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015672972455257723, |
|
"loss": 0.2, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015639344761111054, |
|
"loss": 0.2409, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015605713847537226, |
|
"loss": 0.239, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015572079883885063, |
|
"loss": 0.3201, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00015538443039518735, |
|
"loss": 0.3946, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001550480348381691, |
|
"loss": 0.3094, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00015471161386171922, |
|
"loss": 0.2782, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001543751691598891, |
|
"loss": 0.282, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001540387024268494, |
|
"loss": 0.2884, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00015370221535688193, |
|
"loss": 0.3097, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00015336570964437075, |
|
"loss": 0.2159, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00015302918698379387, |
|
"loss": 0.2569, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00015269264906971457, |
|
"loss": 0.2641, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001523560975967731, |
|
"loss": 0.3836, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001520195342596778, |
|
"loss": 0.2721, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00015168296075319688, |
|
"loss": 0.302, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00015134637877214967, |
|
"loss": 0.3011, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00015100979001139828, |
|
"loss": 0.2276, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00015067319616583885, |
|
"loss": 0.3098, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.0001503365989303931, |
|
"loss": 0.1844, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00015, |
|
"loss": 0.1585, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001496634010696069, |
|
"loss": 0.2755, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014932680383416118, |
|
"loss": 0.2363, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014899020998860175, |
|
"loss": 0.2918, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001486536212278503, |
|
"loss": 0.2651, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001483170392468031, |
|
"loss": 0.2688, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014798046574032215, |
|
"loss": 0.2422, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014764390240322691, |
|
"loss": 0.2634, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014730735093028543, |
|
"loss": 0.2553, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014697081301620615, |
|
"loss": 0.2172, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014663429035562925, |
|
"loss": 0.3092, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014629778464311804, |
|
"loss": 0.2259, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001459612975731506, |
|
"loss": 0.331, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014562483084011092, |
|
"loss": 0.2816, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00014528838613828076, |
|
"loss": 0.2568, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014495196516183095, |
|
"loss": 0.2572, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014461556960481265, |
|
"loss": 0.3297, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014427920116114935, |
|
"loss": 0.3141, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001439428615246277, |
|
"loss": 0.3239, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014360655238888947, |
|
"loss": 0.3344, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001432702754474228, |
|
"loss": 0.2538, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001429340323935536, |
|
"loss": 0.2107, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014259782492043738, |
|
"loss": 0.3295, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014226165472105021, |
|
"loss": 0.2634, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014192552348818078, |
|
"loss": 0.2797, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001415894329144212, |
|
"loss": 0.149, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.0001412533846921592, |
|
"loss": 0.2558, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00014091738051356896, |
|
"loss": 0.3599, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.000140581422070603, |
|
"loss": 0.2474, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001402455110549836, |
|
"loss": 0.2546, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013990964915819404, |
|
"loss": 0.238, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013957383807147053, |
|
"loss": 0.3249, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013923807948579308, |
|
"loss": 0.2205, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001389023750918777, |
|
"loss": 0.2654, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013856672658016728, |
|
"loss": 0.2748, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013823113564082325, |
|
"loss": 0.2586, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001378956039637174, |
|
"loss": 0.2715, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013756013323842288, |
|
"loss": 0.1985, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001372247251542061, |
|
"loss": 0.2243, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001368893814000178, |
|
"loss": 0.2082, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013655410366448498, |
|
"loss": 0.3412, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013621889363590214, |
|
"loss": 0.2684, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00013588375300222283, |
|
"loss": 0.3061, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013554868345105124, |
|
"loss": 0.2027, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013521368666963345, |
|
"loss": 0.2806, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013487876434484933, |
|
"loss": 0.2321, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013454391816320366, |
|
"loss": 0.2878, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013420914981081774, |
|
"loss": 0.221, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013387446097342127, |
|
"loss": 0.2298, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001335398533363432, |
|
"loss": 0.2317, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.0001332053285845038, |
|
"loss": 0.3981, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013287088840240591, |
|
"loss": 0.3128, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013253653447412657, |
|
"loss": 0.3695, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013220226848330839, |
|
"loss": 0.188, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013186809211315116, |
|
"loss": 0.3359, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013153400704640357, |
|
"loss": 0.2317, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013120001496535433, |
|
"loss": 0.2897, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"eval_loss": 0.2967790365219116, |
|
"eval_runtime": 413.6599, |
|
"eval_samples_per_second": 2.258, |
|
"eval_steps_per_second": 0.283, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00013086611755182414, |
|
"loss": 0.2827, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001305323164871567, |
|
"loss": 0.271, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00013019861345221087, |
|
"loss": 0.2886, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001298650101273517, |
|
"loss": 0.2194, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00012953150819244207, |
|
"loss": 0.2513, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00012919810932683463, |
|
"loss": 0.2318, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001288648152093626, |
|
"loss": 0.2832, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00012853162751833206, |
|
"loss": 0.2341, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00012819854793151312, |
|
"loss": 0.256, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00012786557812613137, |
|
"loss": 0.3107, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00012753271977885975, |
|
"loss": 0.1994, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001271999745658099, |
|
"loss": 0.3033, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00012686734416252385, |
|
"loss": 0.2753, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00012653483024396533, |
|
"loss": 0.285, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00012620243448451175, |
|
"loss": 0.2635, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012587015855794524, |
|
"loss": 0.29, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012553800413744472, |
|
"loss": 0.2186, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012520597289557728, |
|
"loss": 0.2064, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012487406650428954, |
|
"loss": 0.2735, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012454228663489965, |
|
"loss": 0.2375, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012421063495808853, |
|
"loss": 0.2282, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001238791131438917, |
|
"loss": 0.3249, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012354772286169065, |
|
"loss": 0.2508, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001232164657802045, |
|
"loss": 0.2783, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001228853435674818, |
|
"loss": 0.2948, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001225543578908918, |
|
"loss": 0.2618, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012222351041711637, |
|
"loss": 0.215, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012189280281214126, |
|
"loss": 0.2082, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00012156223674124823, |
|
"loss": 0.2574, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.000121231813869006, |
|
"loss": 0.2574, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012090153585926237, |
|
"loss": 0.2667, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012057140437513575, |
|
"loss": 0.2521, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00012024142107900652, |
|
"loss": 0.241, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00011991158763250908, |
|
"loss": 0.2791, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00011958190569652316, |
|
"loss": 0.243, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00011925237693116544, |
|
"loss": 0.2643, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00011892300299578159, |
|
"loss": 0.2315, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00011859378554893731, |
|
"loss": 0.2066, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00011826472624841058, |
|
"loss": 0.1797, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.0001179358267511828, |
|
"loss": 0.281, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00011760708871343093, |
|
"loss": 0.2792, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00011727851379051865, |
|
"loss": 0.2355, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00011695010363698834, |
|
"loss": 0.2957, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00011662185990655284, |
|
"loss": 0.4294, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00011629378425208677, |
|
"loss": 0.3326, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00011596587832561854, |
|
"loss": 0.2172, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00011563814377832175, |
|
"loss": 0.3538, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00011531058226050718, |
|
"loss": 0.2216, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00011498319542161423, |
|
"loss": 0.2975, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00011465598491020261, |
|
"loss": 0.2169, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00011432895237394435, |
|
"loss": 0.3459, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001140020994596151, |
|
"loss": 0.2275, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00011367542781308613, |
|
"loss": 0.278, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00011334893907931586, |
|
"loss": 0.2121, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00011302263490234176, |
|
"loss": 0.2383, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001126965169252718, |
|
"loss": 0.2471, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00011237058679027641, |
|
"loss": 0.2681, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00011204484613858025, |
|
"loss": 0.2053, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.0001117192966104536, |
|
"loss": 0.2396, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00011139393984520465, |
|
"loss": 0.2365, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00011106877748117063, |
|
"loss": 0.1745, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00011074381115570998, |
|
"loss": 0.253, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.0001104190425051941, |
|
"loss": 0.2984, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00011009447316499873, |
|
"loss": 0.2862, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00010977010476949628, |
|
"loss": 0.2951, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00010944593895204705, |
|
"loss": 0.2645, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00010912197734499144, |
|
"loss": 0.285, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00010879822157964149, |
|
"loss": 0.2188, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00010847467328627259, |
|
"loss": 0.2755, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00010815133409411562, |
|
"loss": 0.2713, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00010782820563134835, |
|
"loss": 0.2568, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00010750528952508762, |
|
"loss": 0.2412, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00010718258740138059, |
|
"loss": 0.2863, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001068601008851974, |
|
"loss": 0.2702, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001065378316004221, |
|
"loss": 0.1762, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.000106215781169845, |
|
"loss": 0.3286, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00010589395121515441, |
|
"loss": 0.1931, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00010557234335692835, |
|
"loss": 0.16, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00010525095921462659, |
|
"loss": 0.2984, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001049298004065822, |
|
"loss": 0.2614, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00010460886854999375, |
|
"loss": 0.3418, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00010428816526091697, |
|
"loss": 0.2817, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00010396769215425642, |
|
"loss": 0.3755, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.0001036474508437579, |
|
"loss": 0.254, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00010332744294199972, |
|
"loss": 0.3858, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00010300767006038513, |
|
"loss": 0.2092, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00010268813380913363, |
|
"loss": 0.2513, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00010236883579727336, |
|
"loss": 0.2815, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00010204977763263279, |
|
"loss": 0.2847, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00010173096092183255, |
|
"loss": 0.286, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00010141238727027761, |
|
"loss": 0.2514, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00010109405828214871, |
|
"loss": 0.32, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00010077597556039489, |
|
"loss": 0.3244, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00010045814070672498, |
|
"loss": 0.2519, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00010014055532159956, |
|
"loss": 0.1773, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.982322100422319e-05, |
|
"loss": 0.2859, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.950613935253612e-05, |
|
"loss": 0.1867, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.918931196320629e-05, |
|
"loss": 0.2626, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.887274043162123e-05, |
|
"loss": 0.3173, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.855642635188028e-05, |
|
"loss": 0.3112, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"eval_loss": 0.2862403690814972, |
|
"eval_runtime": 414.2075, |
|
"eval_samples_per_second": 2.255, |
|
"eval_steps_per_second": 0.282, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.824037131678615e-05, |
|
"loss": 0.3049, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 9.792457691783723e-05, |
|
"loss": 0.256, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.76090447452196e-05, |
|
"loss": 0.2294, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.729377638779857e-05, |
|
"loss": 0.286, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.697877343311144e-05, |
|
"loss": 0.281, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.666403746735868e-05, |
|
"loss": 0.2442, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.634957007539659e-05, |
|
"loss": 0.2457, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.603537284072899e-05, |
|
"loss": 0.2488, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.572144734549919e-05, |
|
"loss": 0.2111, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.540779517048235e-05, |
|
"loss": 0.2211, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.50944178950772e-05, |
|
"loss": 0.3047, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.47813170972983e-05, |
|
"loss": 0.2119, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.446849435376795e-05, |
|
"loss": 0.3416, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.415595123970821e-05, |
|
"loss": 0.2621, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.38436893289333e-05, |
|
"loss": 0.3434, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 9.353171019384119e-05, |
|
"loss": 0.2789, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.322001540540614e-05, |
|
"loss": 0.2642, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.290860653317035e-05, |
|
"loss": 0.2013, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.259748514523653e-05, |
|
"loss": 0.2641, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.228665280825961e-05, |
|
"loss": 0.3053, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.197611108743899e-05, |
|
"loss": 0.2645, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.166586154651082e-05, |
|
"loss": 0.2962, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.135590574773973e-05, |
|
"loss": 0.2297, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.104624525191145e-05, |
|
"loss": 0.2757, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.073688161832457e-05, |
|
"loss": 0.2522, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.042781640478291e-05, |
|
"loss": 0.1648, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 9.011905116758752e-05, |
|
"loss": 0.2364, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 8.981058746152887e-05, |
|
"loss": 0.3065, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 8.950242683987925e-05, |
|
"loss": 0.2621, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 8.919457085438458e-05, |
|
"loss": 0.2498, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 8.888702105525695e-05, |
|
"loss": 0.2306, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.857977899116645e-05, |
|
"loss": 0.2246, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.827284620923368e-05, |
|
"loss": 0.228, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.796622425502192e-05, |
|
"loss": 0.1776, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.765991467252908e-05, |
|
"loss": 0.2613, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.735391900418033e-05, |
|
"loss": 0.2944, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.704823879081997e-05, |
|
"loss": 0.2655, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.67428755717039e-05, |
|
"loss": 0.3042, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.643783088449169e-05, |
|
"loss": 0.262, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.613310626523909e-05, |
|
"loss": 0.2258, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.58287032483901e-05, |
|
"loss": 0.2142, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.552462336676914e-05, |
|
"loss": 0.2212, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.522086815157373e-05, |
|
"loss": 0.2267, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.491743913236628e-05, |
|
"loss": 0.2237, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 8.461433783706682e-05, |
|
"loss": 0.2641, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.431156579194507e-05, |
|
"loss": 0.3007, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.400912452161271e-05, |
|
"loss": 0.2414, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.370701554901602e-05, |
|
"loss": 0.2293, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.340524039542769e-05, |
|
"loss": 0.2484, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.310380058043972e-05, |
|
"loss": 0.331, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.280269762195539e-05, |
|
"loss": 0.2931, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.250193303618182e-05, |
|
"loss": 0.293, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.220150833762215e-05, |
|
"loss": 0.1975, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.190142503906798e-05, |
|
"loss": 0.2335, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.160168465159199e-05, |
|
"loss": 0.208, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.130228868453988e-05, |
|
"loss": 0.3428, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.10032386455232e-05, |
|
"loss": 0.2472, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.070453604041155e-05, |
|
"loss": 0.3039, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.04061823733249e-05, |
|
"loss": 0.2255, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 8.010817914662631e-05, |
|
"loss": 0.2257, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.9810527860914e-05, |
|
"loss": 0.3138, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.951323001501418e-05, |
|
"loss": 0.1823, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.92162871059732e-05, |
|
"loss": 0.2379, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.89197006290502e-05, |
|
"loss": 0.2631, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.86234720777094e-05, |
|
"loss": 0.206, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.83276029436127e-05, |
|
"loss": 0.2091, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.803209471661223e-05, |
|
"loss": 0.2355, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.773694888474267e-05, |
|
"loss": 0.1977, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.744216693421402e-05, |
|
"loss": 0.2937, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.714775034940373e-05, |
|
"loss": 0.263, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.685370061284964e-05, |
|
"loss": 0.2564, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.656001920524219e-05, |
|
"loss": 0.2825, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.626670760541701e-05, |
|
"loss": 0.2408, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 7.597376729034797e-05, |
|
"loss": 0.3018, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.568119973513885e-05, |
|
"loss": 0.2988, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.538900641301672e-05, |
|
"loss": 0.1808, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.509718879532394e-05, |
|
"loss": 0.2324, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.480574835151128e-05, |
|
"loss": 0.1946, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.451468654912999e-05, |
|
"loss": 0.2703, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.422400485382483e-05, |
|
"loss": 0.2354, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.393370472932654e-05, |
|
"loss": 0.2754, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.364378763744429e-05, |
|
"loss": 0.3297, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.335425503805872e-05, |
|
"loss": 0.2702, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.306510838911407e-05, |
|
"loss": 0.2039, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.27763491466114e-05, |
|
"loss": 0.2706, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.248797876460085e-05, |
|
"loss": 0.2029, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.219999869517439e-05, |
|
"loss": 0.3196, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.191241038845874e-05, |
|
"loss": 0.1675, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 7.162521529260767e-05, |
|
"loss": 0.1959, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.133841485379512e-05, |
|
"loss": 0.2223, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.105201051620772e-05, |
|
"loss": 0.2514, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.076600372203735e-05, |
|
"loss": 0.2726, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.04803959114743e-05, |
|
"loss": 0.2536, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 7.019518852269953e-05, |
|
"loss": 0.2934, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.99103829918779e-05, |
|
"loss": 0.2486, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.962598075315046e-05, |
|
"loss": 0.2011, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.934198323862784e-05, |
|
"loss": 0.2214, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.905839187838235e-05, |
|
"loss": 0.2271, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.877520810044114e-05, |
|
"loss": 0.293, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 6.849243333077919e-05, |
|
"loss": 0.2479, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"eval_loss": 0.27964773774147034, |
|
"eval_runtime": 411.8246, |
|
"eval_samples_per_second": 2.268, |
|
"eval_steps_per_second": 0.284, |
|
"step": 1000 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 1444, |
|
"num_train_epochs": 1, |
|
"save_steps": 500, |
|
"total_flos": 1.1151496043737252e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|