|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 7.0588235294117645, |
|
"eval_steps": 500, |
|
"global_step": 240, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019922390686882426, |
|
"loss": 1.5465, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019842381085730289, |
|
"loss": 1.4494, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019762371484578152, |
|
"loss": 1.2437, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019682361883426012, |
|
"loss": 1.3019, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019602352282273875, |
|
"loss": 1.4664, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019522342681121735, |
|
"loss": 1.1706, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019442333079969598, |
|
"loss": 1.2089, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019362323478817458, |
|
"loss": 1.0329, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001928231387766532, |
|
"loss": 1.3136, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019202304276513184, |
|
"loss": 1.0903, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019122294675361044, |
|
"loss": 1.089, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019042285074208904, |
|
"loss": 1.2918, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00018962275473056767, |
|
"loss": 1.0208, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.0001888226587190463, |
|
"loss": 1.2414, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00018802256270752493, |
|
"loss": 0.8309, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00018722246669600353, |
|
"loss": 1.0701, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00018642237068448213, |
|
"loss": 1.103, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00018562227467296076, |
|
"loss": 1.1536, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001848221786614394, |
|
"loss": 0.9786, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.000184022082649918, |
|
"loss": 1.0474, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00018322198663839662, |
|
"loss": 0.9066, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00018242189062687522, |
|
"loss": 1.0756, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00018162179461535385, |
|
"loss": 0.9649, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00018082169860383248, |
|
"loss": 1.0698, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00018002160259231108, |
|
"loss": 1.0971, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0001792215065807897, |
|
"loss": 1.0154, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00017842141056926834, |
|
"loss": 1.2113, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00017762131455774694, |
|
"loss": 1.0433, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00017682121854622554, |
|
"loss": 0.9149, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00017602112253470417, |
|
"loss": 1.294, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.0001752210265231828, |
|
"loss": 0.8756, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00017442093051166143, |
|
"loss": 0.9514, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00017362083450014, |
|
"loss": 0.7657, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00017282073848861863, |
|
"loss": 0.9848, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00017202064247709726, |
|
"loss": 0.9737, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.0001712205464655759, |
|
"loss": 0.6804, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.0001704204504540545, |
|
"loss": 1.0733, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0001696203544425331, |
|
"loss": 0.984, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00016882025843101172, |
|
"loss": 0.7753, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00016802016241949035, |
|
"loss": 0.9925, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00016722006640796896, |
|
"loss": 0.4777, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00016641997039644758, |
|
"loss": 0.8519, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00016561987438492621, |
|
"loss": 0.8004, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00016481977837340482, |
|
"loss": 0.8533, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00016401968236188344, |
|
"loss": 0.5997, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00016321958635036205, |
|
"loss": 0.7882, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00016241949033884068, |
|
"loss": 1.0804, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0001616193943273193, |
|
"loss": 0.4132, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0001608192983157979, |
|
"loss": 0.9914, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0001600192023042765, |
|
"loss": 0.3964, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00015921910629275514, |
|
"loss": 0.5703, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00015841901028123377, |
|
"loss": 0.7238, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00015761891426971237, |
|
"loss": 0.6712, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00015681881825819097, |
|
"loss": 0.6738, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0001560187222466696, |
|
"loss": 0.6217, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00015521862623514823, |
|
"loss": 0.5953, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00015441853022362686, |
|
"loss": 0.8303, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00015361843421210546, |
|
"loss": 0.6253, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001528183382005841, |
|
"loss": 0.3754, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0001520182421890627, |
|
"loss": 0.6664, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00015121814617754132, |
|
"loss": 0.6756, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00015041805016601992, |
|
"loss": 0.8601, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00014961795415449855, |
|
"loss": 0.6088, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00014881785814297718, |
|
"loss": 0.5837, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00014801776213145578, |
|
"loss": 0.5813, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00014721766611993438, |
|
"loss": 0.672, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.000146417570108413, |
|
"loss": 0.9818, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00014561747409689164, |
|
"loss": 0.7044, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00014481737808537027, |
|
"loss": 0.4525, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00014401728207384887, |
|
"loss": 0.3649, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00014321718606232747, |
|
"loss": 0.4089, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.0001424170900508061, |
|
"loss": 0.4102, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.00014161699403928473, |
|
"loss": 0.2403, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.00014081689802776333, |
|
"loss": 0.3113, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00014001680201624196, |
|
"loss": 0.5503, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.00013921670600472056, |
|
"loss": 0.6021, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0001384166099931992, |
|
"loss": 0.5271, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.00013761651398167782, |
|
"loss": 0.3207, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.00013681641797015642, |
|
"loss": 0.4899, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.00013601632195863505, |
|
"loss": 0.3858, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00013521622594711365, |
|
"loss": 0.4361, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.00013441612993559228, |
|
"loss": 0.3672, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00013361603392407088, |
|
"loss": 0.5378, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.00013281593791254951, |
|
"loss": 0.3577, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.00013201584190102814, |
|
"loss": 0.2658, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.00013121574588950674, |
|
"loss": 0.2015, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.00013041564987798535, |
|
"loss": 0.4888, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00012961555386646398, |
|
"loss": 0.3641, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0001288154578549426, |
|
"loss": 0.3613, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.00012801536184342123, |
|
"loss": 0.3576, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.00012721526583189984, |
|
"loss": 0.1833, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.00012641516982037844, |
|
"loss": 0.1528, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.00012561507380885707, |
|
"loss": 0.5194, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.0001248149777973357, |
|
"loss": 0.543, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.0001240148817858143, |
|
"loss": 0.2746, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.00012321478577429293, |
|
"loss": 0.1863, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.00012241468976277153, |
|
"loss": 0.4064, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.00012161459375125014, |
|
"loss": 0.4429, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.00012081449773972877, |
|
"loss": 0.32, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.00012001440172820739, |
|
"loss": 0.266, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.00011921430571668602, |
|
"loss": 0.4243, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.00011841420970516463, |
|
"loss": 0.381, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 3.03, |
|
"learning_rate": 0.00011761411369364323, |
|
"loss": 0.1964, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 3.06, |
|
"learning_rate": 0.00011681401768212186, |
|
"loss": 0.2786, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 3.09, |
|
"learning_rate": 0.00011601392167060048, |
|
"loss": 0.2606, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 3.12, |
|
"learning_rate": 0.0001152138256590791, |
|
"loss": 0.1124, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 3.15, |
|
"learning_rate": 0.00011441372964755772, |
|
"loss": 0.242, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 3.18, |
|
"learning_rate": 0.00011361363363603632, |
|
"loss": 0.1387, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 3.21, |
|
"learning_rate": 0.00011281353762451494, |
|
"loss": 0.1772, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 3.24, |
|
"learning_rate": 0.00011201344161299357, |
|
"loss": 0.1568, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 3.26, |
|
"learning_rate": 0.00011121334560147218, |
|
"loss": 0.2883, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 3.29, |
|
"learning_rate": 0.00011041324958995081, |
|
"loss": 0.2013, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 3.32, |
|
"learning_rate": 0.00010961315357842943, |
|
"loss": 0.2457, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 3.35, |
|
"learning_rate": 0.00010881305756690803, |
|
"loss": 0.1284, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 3.38, |
|
"learning_rate": 0.00010801296155538665, |
|
"loss": 0.1901, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 3.41, |
|
"learning_rate": 0.00010721286554386528, |
|
"loss": 0.1334, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 3.44, |
|
"learning_rate": 0.00010641276953234389, |
|
"loss": 0.1461, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 3.47, |
|
"learning_rate": 0.00010561267352082252, |
|
"loss": 0.2129, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 3.5, |
|
"learning_rate": 0.00010481257750930111, |
|
"loss": 0.0939, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 3.53, |
|
"learning_rate": 0.00010401248149777974, |
|
"loss": 0.1209, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 3.56, |
|
"learning_rate": 0.00010321238548625835, |
|
"loss": 0.1331, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 3.59, |
|
"learning_rate": 0.00010241228947473698, |
|
"loss": 0.0941, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 3.62, |
|
"learning_rate": 0.0001016121934632156, |
|
"loss": 0.2107, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 3.65, |
|
"learning_rate": 0.0001008120974516942, |
|
"loss": 0.2211, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 3.68, |
|
"learning_rate": 0.00010001200144017281, |
|
"loss": 0.145, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 3.71, |
|
"learning_rate": 9.921190542865144e-05, |
|
"loss": 0.1024, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 3.74, |
|
"learning_rate": 9.841180941713006e-05, |
|
"loss": 0.0663, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 3.76, |
|
"learning_rate": 9.761171340560867e-05, |
|
"loss": 0.1352, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 3.79, |
|
"learning_rate": 9.681161739408729e-05, |
|
"loss": 0.2258, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 3.82, |
|
"learning_rate": 9.601152138256592e-05, |
|
"loss": 0.1926, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 9.521142537104452e-05, |
|
"loss": 0.1149, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 3.88, |
|
"learning_rate": 9.441132935952315e-05, |
|
"loss": 0.1301, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 3.91, |
|
"learning_rate": 9.361123334800176e-05, |
|
"loss": 0.0552, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 3.94, |
|
"learning_rate": 9.281113733648038e-05, |
|
"loss": 0.1837, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 3.97, |
|
"learning_rate": 9.2011041324959e-05, |
|
"loss": 0.1085, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"learning_rate": 9.121094531343761e-05, |
|
"loss": 0.2303, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 4.03, |
|
"learning_rate": 9.041084930191624e-05, |
|
"loss": 0.0498, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 4.06, |
|
"learning_rate": 8.961075329039486e-05, |
|
"loss": 0.0452, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 8.881065727887347e-05, |
|
"loss": 0.1161, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 4.12, |
|
"learning_rate": 8.801056126735209e-05, |
|
"loss": 0.0441, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 4.15, |
|
"learning_rate": 8.721046525583072e-05, |
|
"loss": 0.0704, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 4.18, |
|
"learning_rate": 8.641036924430932e-05, |
|
"loss": 0.1103, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 4.21, |
|
"learning_rate": 8.561027323278795e-05, |
|
"loss": 0.0313, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 4.24, |
|
"learning_rate": 8.481017722126655e-05, |
|
"loss": 0.0494, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 4.26, |
|
"learning_rate": 8.401008120974518e-05, |
|
"loss": 0.0805, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 4.29, |
|
"learning_rate": 8.320998519822379e-05, |
|
"loss": 0.0512, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 4.32, |
|
"learning_rate": 8.240988918670241e-05, |
|
"loss": 0.0442, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 4.35, |
|
"learning_rate": 8.160979317518102e-05, |
|
"loss": 0.0944, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 4.38, |
|
"learning_rate": 8.080969716365965e-05, |
|
"loss": 0.0621, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 4.41, |
|
"learning_rate": 8.000960115213825e-05, |
|
"loss": 0.1191, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 4.44, |
|
"learning_rate": 7.920950514061688e-05, |
|
"loss": 0.1518, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 4.47, |
|
"learning_rate": 7.840940912909548e-05, |
|
"loss": 0.1002, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 4.5, |
|
"learning_rate": 7.760931311757411e-05, |
|
"loss": 0.0803, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 4.53, |
|
"learning_rate": 7.680921710605273e-05, |
|
"loss": 0.0582, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 4.56, |
|
"learning_rate": 7.600912109453134e-05, |
|
"loss": 0.0745, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 4.59, |
|
"learning_rate": 7.520902508300996e-05, |
|
"loss": 0.0844, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 4.62, |
|
"learning_rate": 7.440892907148859e-05, |
|
"loss": 0.056, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 4.65, |
|
"learning_rate": 7.360883305996719e-05, |
|
"loss": 0.1091, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 4.68, |
|
"learning_rate": 7.280873704844582e-05, |
|
"loss": 0.0561, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 4.71, |
|
"learning_rate": 7.200864103692444e-05, |
|
"loss": 0.0815, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 4.74, |
|
"learning_rate": 7.120854502540305e-05, |
|
"loss": 0.0704, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 4.76, |
|
"learning_rate": 7.040844901388167e-05, |
|
"loss": 0.0954, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 4.79, |
|
"learning_rate": 6.960835300236028e-05, |
|
"loss": 0.0915, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 6.880825699083891e-05, |
|
"loss": 0.0539, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 4.85, |
|
"learning_rate": 6.800816097931753e-05, |
|
"loss": 0.0694, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 4.88, |
|
"learning_rate": 6.720806496779614e-05, |
|
"loss": 0.0763, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 4.91, |
|
"learning_rate": 6.640796895627476e-05, |
|
"loss": 0.0388, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 4.94, |
|
"learning_rate": 6.560787294475337e-05, |
|
"loss": 0.1189, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 4.97, |
|
"learning_rate": 6.480777693323199e-05, |
|
"loss": 0.0441, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 6.400768092171062e-05, |
|
"loss": 0.0801, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 5.03, |
|
"learning_rate": 6.320758491018922e-05, |
|
"loss": 0.027, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 6.240748889866785e-05, |
|
"loss": 0.0346, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 5.09, |
|
"learning_rate": 6.160739288714646e-05, |
|
"loss": 0.0385, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 5.12, |
|
"learning_rate": 6.080729687562507e-05, |
|
"loss": 0.0575, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 5.15, |
|
"learning_rate": 6.0007200864103694e-05, |
|
"loss": 0.042, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 5.18, |
|
"learning_rate": 5.9207104852582316e-05, |
|
"loss": 0.0521, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 5.21, |
|
"learning_rate": 5.840700884106093e-05, |
|
"loss": 0.054, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 5.24, |
|
"learning_rate": 5.760691282953955e-05, |
|
"loss": 0.0503, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 5.26, |
|
"learning_rate": 5.680681681801816e-05, |
|
"loss": 0.0285, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 5.29, |
|
"learning_rate": 5.6006720806496785e-05, |
|
"loss": 0.0281, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 5.32, |
|
"learning_rate": 5.520662479497541e-05, |
|
"loss": 0.0402, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 5.35, |
|
"learning_rate": 5.4406528783454015e-05, |
|
"loss": 0.04, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 5.38, |
|
"learning_rate": 5.360643277193264e-05, |
|
"loss": 0.044, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 5.41, |
|
"learning_rate": 5.280633676041126e-05, |
|
"loss": 0.0264, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 5.44, |
|
"learning_rate": 5.200624074888987e-05, |
|
"loss": 0.0379, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 5.47, |
|
"learning_rate": 5.120614473736849e-05, |
|
"loss": 0.037, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 5.5, |
|
"learning_rate": 5.04060487258471e-05, |
|
"loss": 0.0422, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 5.53, |
|
"learning_rate": 4.960595271432572e-05, |
|
"loss": 0.0313, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 5.56, |
|
"learning_rate": 4.880585670280434e-05, |
|
"loss": 0.0241, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 5.59, |
|
"learning_rate": 4.800576069128296e-05, |
|
"loss": 0.0546, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 5.62, |
|
"learning_rate": 4.7205664679761575e-05, |
|
"loss": 0.0305, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 5.65, |
|
"learning_rate": 4.640556866824019e-05, |
|
"loss": 0.0364, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 5.68, |
|
"learning_rate": 4.5605472656718805e-05, |
|
"loss": 0.0329, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 5.71, |
|
"learning_rate": 4.480537664519743e-05, |
|
"loss": 0.0453, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 5.74, |
|
"learning_rate": 4.400528063367604e-05, |
|
"loss": 0.0386, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 5.76, |
|
"learning_rate": 4.320518462215466e-05, |
|
"loss": 0.0401, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 5.79, |
|
"learning_rate": 4.2405088610633274e-05, |
|
"loss": 0.0527, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 5.82, |
|
"learning_rate": 4.1604992599111896e-05, |
|
"loss": 0.0342, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 5.85, |
|
"learning_rate": 4.080489658759051e-05, |
|
"loss": 0.0336, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 5.88, |
|
"learning_rate": 4.000480057606913e-05, |
|
"loss": 0.034, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 5.91, |
|
"learning_rate": 3.920470456454774e-05, |
|
"loss": 0.0316, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 5.94, |
|
"learning_rate": 3.8404608553026365e-05, |
|
"loss": 0.0537, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 5.97, |
|
"learning_rate": 3.760451254150498e-05, |
|
"loss": 0.0348, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"learning_rate": 3.6804416529983595e-05, |
|
"loss": 0.0525, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 6.03, |
|
"learning_rate": 3.600432051846222e-05, |
|
"loss": 0.0264, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 6.06, |
|
"learning_rate": 3.520422450694083e-05, |
|
"loss": 0.0216, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 6.09, |
|
"learning_rate": 3.4404128495419455e-05, |
|
"loss": 0.0232, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 6.12, |
|
"learning_rate": 3.360403248389807e-05, |
|
"loss": 0.0342, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 6.15, |
|
"learning_rate": 3.2803936472376686e-05, |
|
"loss": 0.0196, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 6.18, |
|
"learning_rate": 3.200384046085531e-05, |
|
"loss": 0.0292, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 6.21, |
|
"learning_rate": 3.1203744449333924e-05, |
|
"loss": 0.02, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 6.24, |
|
"learning_rate": 3.0403648437812536e-05, |
|
"loss": 0.0243, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 2.9603552426291158e-05, |
|
"loss": 0.0223, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 6.29, |
|
"learning_rate": 2.8803456414769773e-05, |
|
"loss": 0.017, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 6.32, |
|
"learning_rate": 2.8003360403248392e-05, |
|
"loss": 0.0201, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 6.35, |
|
"learning_rate": 2.7203264391727008e-05, |
|
"loss": 0.0368, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 6.38, |
|
"learning_rate": 2.640316838020563e-05, |
|
"loss": 0.0228, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 6.41, |
|
"learning_rate": 2.5603072368684245e-05, |
|
"loss": 0.0241, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 6.44, |
|
"learning_rate": 2.480297635716286e-05, |
|
"loss": 0.025, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 6.47, |
|
"learning_rate": 2.400288034564148e-05, |
|
"loss": 0.0261, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 2.3202784334120095e-05, |
|
"loss": 0.026, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 6.53, |
|
"learning_rate": 2.2402688322598714e-05, |
|
"loss": 0.0242, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 6.56, |
|
"learning_rate": 2.160259231107733e-05, |
|
"loss": 0.029, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 6.59, |
|
"learning_rate": 2.0802496299555948e-05, |
|
"loss": 0.0305, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 6.62, |
|
"learning_rate": 2.0002400288034563e-05, |
|
"loss": 0.0263, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 6.65, |
|
"learning_rate": 1.9202304276513182e-05, |
|
"loss": 0.021, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 6.68, |
|
"learning_rate": 1.8402208264991798e-05, |
|
"loss": 0.0295, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 6.71, |
|
"learning_rate": 1.7602112253470417e-05, |
|
"loss": 0.0276, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 1.6802016241949035e-05, |
|
"loss": 0.023, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 6.76, |
|
"learning_rate": 1.6001920230427654e-05, |
|
"loss": 0.0251, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 6.79, |
|
"learning_rate": 1.5201824218906268e-05, |
|
"loss": 0.0415, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 6.82, |
|
"learning_rate": 1.4401728207384887e-05, |
|
"loss": 0.0292, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 6.85, |
|
"learning_rate": 1.3601632195863504e-05, |
|
"loss": 0.0268, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 6.88, |
|
"learning_rate": 1.2801536184342123e-05, |
|
"loss": 0.0265, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 6.91, |
|
"learning_rate": 1.200144017282074e-05, |
|
"loss": 0.0208, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 6.94, |
|
"learning_rate": 1.1201344161299357e-05, |
|
"loss": 0.0231, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 6.97, |
|
"learning_rate": 1.0401248149777974e-05, |
|
"loss": 0.022, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"learning_rate": 9.601152138256591e-06, |
|
"loss": 0.0242, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 7.03, |
|
"learning_rate": 8.801056126735208e-06, |
|
"loss": 0.0155, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 7.06, |
|
"learning_rate": 8.000960115213827e-06, |
|
"loss": 0.0183, |
|
"step": 240 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 250, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 8, |
|
"save_steps": 20, |
|
"total_flos": 1.5323280428875776e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|