|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 3.0, |
|
"eval_steps": 200, |
|
"global_step": 1215, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 2e-05, |
|
"loss": 2.8335, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 4e-05, |
|
"loss": 2.752, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 6e-05, |
|
"loss": 2.7803, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 8e-05, |
|
"loss": 2.5798, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.0001, |
|
"loss": 2.7038, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.01, |
|
"learning_rate": 0.00012, |
|
"loss": 2.1478, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00014, |
|
"loss": 1.7468, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00016, |
|
"loss": 1.6426, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.00018, |
|
"loss": 0.9787, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.02, |
|
"learning_rate": 0.0002, |
|
"loss": 0.7736, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019999997902092877, |
|
"loss": 0.5849, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019999991608372393, |
|
"loss": 0.4335, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019999981118841182, |
|
"loss": 0.3929, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 0.00019999966433503652, |
|
"loss": 0.1443, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019999947552365961, |
|
"loss": 0.1243, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.0001999992447543603, |
|
"loss": 0.4306, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019999897202723545, |
|
"loss": 0.2558, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.04, |
|
"learning_rate": 0.00019999865734239946, |
|
"loss": 0.1647, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001999983006999844, |
|
"loss": 0.191, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019999790210013988, |
|
"loss": 0.259, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.00019999746154303317, |
|
"loss": 0.0401, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.05, |
|
"learning_rate": 0.0001999969790288491, |
|
"loss": 0.0613, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019999645455779014, |
|
"loss": 0.2135, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019999588813007633, |
|
"loss": 0.0816, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.0001999952797459453, |
|
"loss": 0.0543, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06, |
|
"learning_rate": 0.00019999462940565243, |
|
"loss": 0.1945, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001999939371094705, |
|
"loss": 0.0461, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.00019999320285769, |
|
"loss": 0.1266, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.000199992426650619, |
|
"loss": 0.0274, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 0.0001999916084885832, |
|
"loss": 0.1039, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019999074837192589, |
|
"loss": 0.0169, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019998984630100792, |
|
"loss": 0.1045, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.00019998890227620783, |
|
"loss": 0.0236, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08, |
|
"learning_rate": 0.0001999879162979217, |
|
"loss": 0.0125, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019998688836656323, |
|
"loss": 0.0317, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.0001999858184825637, |
|
"loss": 0.0216, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019998470664637203, |
|
"loss": 0.0583, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09, |
|
"learning_rate": 0.00019998355285845475, |
|
"loss": 0.0534, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001999823571192959, |
|
"loss": 0.1213, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.0001999811194293973, |
|
"loss": 0.2095, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019997983978927813, |
|
"loss": 0.0122, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 0.00019997851819947537, |
|
"loss": 0.2262, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019997715466054357, |
|
"loss": 0.0329, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019997574917305478, |
|
"loss": 0.0907, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019997430173759875, |
|
"loss": 0.0111, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11, |
|
"learning_rate": 0.00019997281235478278, |
|
"loss": 0.1297, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019997128102523183, |
|
"loss": 0.0805, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019996970774958836, |
|
"loss": 0.0527, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019996809252851251, |
|
"loss": 0.0117, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.12, |
|
"learning_rate": 0.00019996643536268204, |
|
"loss": 0.0287, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001999647362527922, |
|
"loss": 0.0106, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.0001999629951995559, |
|
"loss": 0.0564, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019996121220370374, |
|
"loss": 0.0318, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 0.00019995938726598373, |
|
"loss": 0.0829, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019995752038716168, |
|
"loss": 0.0564, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019995561156802079, |
|
"loss": 0.0852, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.00019995366080936204, |
|
"loss": 0.0146, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.14, |
|
"learning_rate": 0.0001999516681120039, |
|
"loss": 0.0446, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019994963347678247, |
|
"loss": 0.0103, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019994755690455152, |
|
"loss": 0.0147, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.00019994543839618221, |
|
"loss": 0.0479, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.15, |
|
"learning_rate": 0.0001999432779525635, |
|
"loss": 0.0028, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001999410755746019, |
|
"loss": 0.0316, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001999388312632214, |
|
"loss": 0.113, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.0001999365450193638, |
|
"loss": 0.0148, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.16, |
|
"learning_rate": 0.00019993421684398824, |
|
"loss": 0.0437, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019993184673807162, |
|
"loss": 0.0287, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019992943470260844, |
|
"loss": 0.0088, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019992698073861064, |
|
"loss": 0.0971, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 0.00019992448484710797, |
|
"loss": 0.0557, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001999219470291476, |
|
"loss": 0.0609, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019991936728579437, |
|
"loss": 0.0033, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.0001999167456181307, |
|
"loss": 0.0853, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.18, |
|
"learning_rate": 0.00019991408202725655, |
|
"loss": 0.0019, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019991137651428957, |
|
"loss": 0.0121, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.0001999086290803649, |
|
"loss": 0.016, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019990583972663534, |
|
"loss": 0.0014, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.19, |
|
"learning_rate": 0.00019990300845427125, |
|
"loss": 0.1069, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019990013526446056, |
|
"loss": 0.0157, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001998972201584088, |
|
"loss": 0.0891, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.0001998942631373391, |
|
"loss": 0.0495, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019989126420249221, |
|
"loss": 0.0441, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 0.00019988822335512637, |
|
"loss": 0.079, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019988514059651752, |
|
"loss": 0.0699, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001998820159279591, |
|
"loss": 0.0009, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.00019987884935076213, |
|
"loss": 0.0007, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"learning_rate": 0.0001998756408662553, |
|
"loss": 0.061, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019987239047578482, |
|
"loss": 0.0012, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019986909818071446, |
|
"loss": 0.0657, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019986576398242566, |
|
"loss": 0.0024, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.22, |
|
"learning_rate": 0.00019986238788231733, |
|
"loss": 0.0411, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019985896988180605, |
|
"loss": 0.0813, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019985550998232596, |
|
"loss": 0.0013, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.00019985200818532875, |
|
"loss": 0.1736, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 0.0001998484644922837, |
|
"loss": 0.0006, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001998448789046777, |
|
"loss": 0.0013, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.0001998412514240152, |
|
"loss": 0.0813, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019983758205181822, |
|
"loss": 0.0002, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 0.00019983387078962631, |
|
"loss": 0.0006, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019983011763899673, |
|
"loss": 0.1275, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019982632260150417, |
|
"loss": 0.0737, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019982248567874098, |
|
"loss": 0.162, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"learning_rate": 0.00019981860687231706, |
|
"loss": 0.2331, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019981468618385988, |
|
"loss": 0.0005, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.0001998107236150145, |
|
"loss": 0.001, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019980671916744352, |
|
"loss": 0.0136, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.26, |
|
"learning_rate": 0.00019980267284282717, |
|
"loss": 0.1149, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019979858464286317, |
|
"loss": 0.1266, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019979445456926688, |
|
"loss": 0.0724, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019979028262377118, |
|
"loss": 0.1143, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 0.00019978606880812657, |
|
"loss": 0.0084, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019978181312410104, |
|
"loss": 0.0657, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019977751557348025, |
|
"loss": 0.1512, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.00019977317615806737, |
|
"loss": 0.0051, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.28, |
|
"learning_rate": 0.0001997687948796831, |
|
"loss": 0.0093, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019976437174016573, |
|
"loss": 0.0067, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.0001997599067413712, |
|
"loss": 0.0949, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019975539988517288, |
|
"loss": 0.0393, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.29, |
|
"learning_rate": 0.00019975085117346177, |
|
"loss": 0.0364, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019974626060814647, |
|
"loss": 0.1306, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019974162819115306, |
|
"loss": 0.0019, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.0001997369539244252, |
|
"loss": 0.0029, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 0.00019973223780992414, |
|
"loss": 0.0159, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001997274798496287, |
|
"loss": 0.0019, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.0001997226800455352, |
|
"loss": 0.0194, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019971783839965756, |
|
"loss": 0.0037, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.31, |
|
"learning_rate": 0.00019971295491402725, |
|
"loss": 0.0442, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019970802959069328, |
|
"loss": 0.003, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019970306243172222, |
|
"loss": 0.0023, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019969805343919821, |
|
"loss": 0.0198, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.32, |
|
"learning_rate": 0.00019969300261522293, |
|
"loss": 0.0025, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.0001996879099619156, |
|
"loss": 0.0574, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019968277548141302, |
|
"loss": 0.0796, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019967759917586953, |
|
"loss": 0.0326, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 0.00019967238104745696, |
|
"loss": 0.0021, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019966712109836476, |
|
"loss": 0.0008, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019966181933079997, |
|
"loss": 0.0943, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.000199656475746987, |
|
"loss": 0.002, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.34, |
|
"learning_rate": 0.00019965109034916808, |
|
"loss": 0.0451, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.00019964566313960264, |
|
"loss": 0.0485, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.000199640194120568, |
|
"loss": 0.0186, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001996346832943587, |
|
"loss": 0.0305, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.35, |
|
"learning_rate": 0.0001996291306632871, |
|
"loss": 0.0625, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019962353622968295, |
|
"loss": 0.0183, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019961789999589356, |
|
"loss": 0.0043, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019961222196428378, |
|
"loss": 0.0493, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.36, |
|
"learning_rate": 0.00019960650213723602, |
|
"loss": 0.1198, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.0001996007405171502, |
|
"loss": 0.0005, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019959493710644384, |
|
"loss": 0.0006, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019958909190755187, |
|
"loss": 0.1201, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 0.00019958320492292686, |
|
"loss": 0.0012, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019957727615503888, |
|
"loss": 0.0025, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019957130560637552, |
|
"loss": 0.1595, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.00019956529327944198, |
|
"loss": 0.0003, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.38, |
|
"learning_rate": 0.0001995592391767608, |
|
"loss": 0.0014, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019955314330087225, |
|
"loss": 0.1316, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019954700565433405, |
|
"loss": 0.0015, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.00019954082623972142, |
|
"loss": 0.0015, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.39, |
|
"learning_rate": 0.0001995346050596271, |
|
"loss": 0.0018, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001995283421166614, |
|
"loss": 0.0638, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019952203741345218, |
|
"loss": 0.0283, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.00019951569095264473, |
|
"loss": 0.0005, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001995093027369019, |
|
"loss": 0.0013, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.4, |
|
"learning_rate": 0.0001995028727689041, |
|
"loss": 0.0828, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019949640105134918, |
|
"loss": 0.0007, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019948988758695263, |
|
"loss": 0.0588, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019948333237844733, |
|
"loss": 0.0166, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.41, |
|
"learning_rate": 0.00019947673542858367, |
|
"loss": 0.2658, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019947009674012973, |
|
"loss": 0.0063, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019946341631587087, |
|
"loss": 0.053, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.0001994566941586101, |
|
"loss": 0.0007, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"learning_rate": 0.00019944993027116797, |
|
"loss": 0.0009, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.0001994431246563824, |
|
"loss": 0.0003, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019943627731710897, |
|
"loss": 0.0008, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019942938825622065, |
|
"loss": 0.0834, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.43, |
|
"learning_rate": 0.00019942245747660796, |
|
"loss": 0.0174, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019941548498117896, |
|
"loss": 0.0963, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.00019940847077285916, |
|
"loss": 0.0361, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001994014148545916, |
|
"loss": 0.0408, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.44, |
|
"learning_rate": 0.0001993943172293368, |
|
"loss": 0.0011, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001993871779000728, |
|
"loss": 0.0801, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.0001993799968697951, |
|
"loss": 0.1176, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00019937277414151677, |
|
"loss": 0.1212, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.45, |
|
"learning_rate": 0.00019936550971826834, |
|
"loss": 0.0458, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00019935820360309777, |
|
"loss": 0.0543, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00019935085579907063, |
|
"loss": 0.0518, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.0001993434663092699, |
|
"loss": 0.0015, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.46, |
|
"learning_rate": 0.00019933603513679605, |
|
"loss": 0.0704, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00019932856228476706, |
|
"loss": 0.0628, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00019932104775631846, |
|
"loss": 0.0602, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.00019931349155460315, |
|
"loss": 0.0025, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.47, |
|
"learning_rate": 0.0001993058936827916, |
|
"loss": 0.0032, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00019929825414407172, |
|
"loss": 0.1927, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.00019929057294164893, |
|
"loss": 0.003, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0001992828500787461, |
|
"loss": 0.0027, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 0.0001992750855586036, |
|
"loss": 0.0225, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00019926727938447933, |
|
"loss": 0.0018, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00019925943155964856, |
|
"loss": 0.0019, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.0001992515420874041, |
|
"loss": 0.0015, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"learning_rate": 0.00019924361097105623, |
|
"loss": 0.0411, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.49, |
|
"eval_loss": 0.02292541041970253, |
|
"eval_runtime": 126.0288, |
|
"eval_samples_per_second": 1.016, |
|
"eval_steps_per_second": 0.341, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001992356382139327, |
|
"loss": 0.0512, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00019922762381937878, |
|
"loss": 0.0013, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.00019921956779075708, |
|
"loss": 0.0022, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"learning_rate": 0.0001992114701314478, |
|
"loss": 0.0513, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00019920333084484857, |
|
"loss": 0.0621, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00019919514993437445, |
|
"loss": 0.0032, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00019918692740345802, |
|
"loss": 0.0048, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.51, |
|
"learning_rate": 0.00019917866325554938, |
|
"loss": 0.001, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00019917035749411586, |
|
"loss": 0.0006, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00019916201012264254, |
|
"loss": 0.0666, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.00019915362114463172, |
|
"loss": 0.049, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.52, |
|
"learning_rate": 0.0001991451905636033, |
|
"loss": 0.0005, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00019913671838309464, |
|
"loss": 0.0025, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00019912820460666044, |
|
"loss": 0.0006, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00019911964923787295, |
|
"loss": 0.062, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.53, |
|
"learning_rate": 0.00019911105228032186, |
|
"loss": 0.0012, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00019910241373761426, |
|
"loss": 0.0817, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00019909373361337476, |
|
"loss": 0.0003, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.00019908501191124534, |
|
"loss": 0.0002, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.54, |
|
"learning_rate": 0.0001990762486348855, |
|
"loss": 0.0434, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00019906744378797212, |
|
"loss": 0.0994, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00019905859737419956, |
|
"loss": 0.0506, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00019904970939727963, |
|
"loss": 0.059, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.55, |
|
"learning_rate": 0.00019904077986094152, |
|
"loss": 0.0042, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00019903180876893194, |
|
"loss": 0.0667, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00019902279612501493, |
|
"loss": 0.002, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.00019901374193297212, |
|
"loss": 0.0003, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.56, |
|
"learning_rate": 0.0001990046461966024, |
|
"loss": 0.0003, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00019899550891972222, |
|
"loss": 0.0002, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00019898633010616542, |
|
"loss": 0.0025, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.00019897710975978321, |
|
"loss": 0.1237, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.57, |
|
"learning_rate": 0.0001989678478844443, |
|
"loss": 0.0625, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00019895854448403482, |
|
"loss": 0.0569, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00019894919956245824, |
|
"loss": 0.001, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00019893981312363562, |
|
"loss": 0.0846, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.58, |
|
"learning_rate": 0.00019893038517150525, |
|
"loss": 0.0325, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00019892091571002297, |
|
"loss": 0.0696, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00019891140474316194, |
|
"loss": 0.0006, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00019890185227491283, |
|
"loss": 0.0002, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.59, |
|
"learning_rate": 0.00019889225830928365, |
|
"loss": 0.0002, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00019888262285029987, |
|
"loss": 0.0008, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00019887294590200435, |
|
"loss": 0.0013, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00019886322746845737, |
|
"loss": 0.0006, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00019885346755373656, |
|
"loss": 0.0004, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.6, |
|
"learning_rate": 0.00019884366616193706, |
|
"loss": 0.0468, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00019883382329717128, |
|
"loss": 0.0017, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00019882393896356913, |
|
"loss": 0.0741, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00019881401316527793, |
|
"loss": 0.0004, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.61, |
|
"learning_rate": 0.00019880404590646232, |
|
"loss": 0.1221, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0001987940371913044, |
|
"loss": 0.0582, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00019878398702400364, |
|
"loss": 0.0689, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.00019877389540877687, |
|
"loss": 0.1289, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.62, |
|
"learning_rate": 0.0001987637623498584, |
|
"loss": 0.0026, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001987535878514998, |
|
"loss": 0.0419, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001987433719179702, |
|
"loss": 0.001, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.0001987331145535559, |
|
"loss": 0.0003, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"learning_rate": 0.00019872281576256077, |
|
"loss": 0.0004, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.000198712475549306, |
|
"loss": 0.0006, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00019870209391813012, |
|
"loss": 0.0495, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.00019869167087338907, |
|
"loss": 0.1258, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.64, |
|
"learning_rate": 0.0001986812064194562, |
|
"loss": 0.0003, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00019867070056072214, |
|
"loss": 0.069, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00019866015330159505, |
|
"loss": 0.1022, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00019864956464650025, |
|
"loss": 0.0012, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.65, |
|
"learning_rate": 0.00019863893459988062, |
|
"loss": 0.1075, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00019862826316619628, |
|
"loss": 0.0002, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.00019861755034992484, |
|
"loss": 0.0004, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0001986067961555611, |
|
"loss": 0.0006, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.66, |
|
"learning_rate": 0.0001985960005876174, |
|
"loss": 0.0541, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00019858516365062334, |
|
"loss": 0.1308, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00019857428534912587, |
|
"loss": 0.0062, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00019856336568768935, |
|
"loss": 0.0382, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"learning_rate": 0.00019855240467089543, |
|
"loss": 0.0003, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00019854140230334322, |
|
"loss": 0.0043, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00019853035858964906, |
|
"loss": 0.0373, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.0001985192735344467, |
|
"loss": 0.0516, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.68, |
|
"learning_rate": 0.00019850814714238716, |
|
"loss": 0.0004, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00019849697941813898, |
|
"loss": 0.1028, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00019848577036638788, |
|
"loss": 0.2622, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00019847451999183694, |
|
"loss": 0.0004, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.69, |
|
"learning_rate": 0.00019846322829920662, |
|
"loss": 0.0006, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00019845189529323475, |
|
"loss": 0.1482, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00019844052097867638, |
|
"loss": 0.038, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00019842910536030403, |
|
"loss": 0.0107, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.7, |
|
"learning_rate": 0.00019841764844290744, |
|
"loss": 0.035, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00019840615023129372, |
|
"loss": 0.035, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00019839461073028732, |
|
"loss": 0.0033, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00019838302994472997, |
|
"loss": 0.0442, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.71, |
|
"learning_rate": 0.00019837140787948082, |
|
"loss": 0.0135, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.0001983597445394162, |
|
"loss": 0.0014, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00019834803992942987, |
|
"loss": 0.1629, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00019833629405443284, |
|
"loss": 0.0867, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 0.00019832450691935352, |
|
"loss": 0.0388, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0001983126785291375, |
|
"loss": 0.0014, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00019830080888874778, |
|
"loss": 0.0028, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.00019828889800316466, |
|
"loss": 0.1433, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.73, |
|
"learning_rate": 0.0001982769458773857, |
|
"loss": 0.1458, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00019826495251642578, |
|
"loss": 0.0005, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00019825291792531716, |
|
"loss": 0.0114, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.00019824084210910925, |
|
"loss": 0.0508, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.74, |
|
"learning_rate": 0.0001982287250728689, |
|
"loss": 0.0006, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00019821656682168012, |
|
"loss": 0.0377, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00019820436736064435, |
|
"loss": 0.0383, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00019819212669488026, |
|
"loss": 0.0052, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"learning_rate": 0.00019817984482952376, |
|
"loss": 0.0008, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00019816752176972813, |
|
"loss": 0.0619, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00019815515752066387, |
|
"loss": 0.0866, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.0001981427520875188, |
|
"loss": 0.002, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.76, |
|
"learning_rate": 0.00019813030547549803, |
|
"loss": 0.0405, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.0001981178176898239, |
|
"loss": 0.0656, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00019810528873573607, |
|
"loss": 0.0043, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00019809271861849145, |
|
"loss": 0.0045, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.77, |
|
"learning_rate": 0.00019808010734336423, |
|
"loss": 0.0021, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00019806745491564586, |
|
"loss": 0.001, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.00019805476134064507, |
|
"loss": 0.09, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0001980420266236878, |
|
"loss": 0.0011, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.78, |
|
"learning_rate": 0.0001980292507701174, |
|
"loss": 0.0013, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.0001980164337852943, |
|
"loss": 0.0007, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00019800357567459633, |
|
"loss": 0.1487, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00019799067644341844, |
|
"loss": 0.098, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.79, |
|
"learning_rate": 0.00019797773609717297, |
|
"loss": 0.0006, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00019796475464128942, |
|
"loss": 0.0469, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00019795173208121458, |
|
"loss": 0.0003, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00019793866842241243, |
|
"loss": 0.0424, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00019792556367036432, |
|
"loss": 0.0003, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.8, |
|
"learning_rate": 0.00019791241783056874, |
|
"loss": 0.0003, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00019789923090854136, |
|
"loss": 0.0243, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00019788600290981525, |
|
"loss": 0.0383, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.00019787273383994062, |
|
"loss": 0.0005, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.81, |
|
"learning_rate": 0.0001978594237044849, |
|
"loss": 0.0329, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00019784607250903277, |
|
"loss": 0.0013, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.0001978326802591862, |
|
"loss": 0.0363, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00019781924696056426, |
|
"loss": 0.1015, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.82, |
|
"learning_rate": 0.00019780577261880336, |
|
"loss": 0.0383, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00019779225723955707, |
|
"loss": 0.001, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.0001977787008284962, |
|
"loss": 0.0498, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00019776510339130873, |
|
"loss": 0.095, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"learning_rate": 0.00019775146493369994, |
|
"loss": 0.0004, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00019773778546139227, |
|
"loss": 0.0002, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.0001977240649801253, |
|
"loss": 0.0003, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.000197710303495656, |
|
"loss": 0.0011, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"learning_rate": 0.00019769650101375837, |
|
"loss": 0.0017, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00019768265754022365, |
|
"loss": 0.0576, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00019766877308086036, |
|
"loss": 0.0002, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00019765484764149415, |
|
"loss": 0.0175, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.85, |
|
"learning_rate": 0.00019764088122796783, |
|
"loss": 0.0002, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0001976268738461415, |
|
"loss": 0.0008, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0001976128255018924, |
|
"loss": 0.0006, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.0001975987362011149, |
|
"loss": 0.1749, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.86, |
|
"learning_rate": 0.00019758460594972068, |
|
"loss": 0.0748, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00019757043475363847, |
|
"loss": 0.0698, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00019755622261881427, |
|
"loss": 0.0002, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00019754196955121123, |
|
"loss": 0.0004, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.87, |
|
"learning_rate": 0.00019752767555680968, |
|
"loss": 0.0491, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00019751334064160706, |
|
"loss": 0.0708, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00019749896481161808, |
|
"loss": 0.0673, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00019748454807287457, |
|
"loss": 0.0002, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.88, |
|
"learning_rate": 0.00019747009043142555, |
|
"loss": 0.0689, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.0001974555918933371, |
|
"loss": 0.001, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00019744105246469263, |
|
"loss": 0.0496, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00019742647215159254, |
|
"loss": 0.0034, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.89, |
|
"learning_rate": 0.00019741185096015448, |
|
"loss": 0.0498, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00019739718889651327, |
|
"loss": 0.02, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00019738248596682078, |
|
"loss": 0.0358, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.00019736774217724614, |
|
"loss": 0.0929, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.9, |
|
"learning_rate": 0.0001973529575339755, |
|
"loss": 0.0001, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00019733813204321233, |
|
"loss": 0.0018, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00019732326571117703, |
|
"loss": 0.0017, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00019730835854410726, |
|
"loss": 0.0001, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.91, |
|
"learning_rate": 0.00019729341054825782, |
|
"loss": 0.0688, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.0001972784217299006, |
|
"loss": 0.0004, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00019726339209532462, |
|
"loss": 0.0652, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00019724832165083603, |
|
"loss": 0.002, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.92, |
|
"learning_rate": 0.00019723321040275815, |
|
"loss": 0.0565, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00019721805835743134, |
|
"loss": 0.0345, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.0001972028655212131, |
|
"loss": 0.0005, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00019718763190047808, |
|
"loss": 0.0726, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.93, |
|
"learning_rate": 0.00019717235750161806, |
|
"loss": 0.08, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00019715704233104185, |
|
"loss": 0.0486, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00019714168639517544, |
|
"loss": 0.0006, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00019712628970046189, |
|
"loss": 0.0001, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.94, |
|
"learning_rate": 0.00019711085225336132, |
|
"loss": 0.0653, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00019709537406035105, |
|
"loss": 0.1022, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00019707985512792543, |
|
"loss": 0.0005, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00019706429546259593, |
|
"loss": 0.0345, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 0.95, |
|
"learning_rate": 0.00019704869507089105, |
|
"loss": 0.0003, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00019703305395935648, |
|
"loss": 0.0231, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.0001970173721345549, |
|
"loss": 0.2725, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00019700164960306614, |
|
"loss": 0.0806, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 0.00019698588637148703, |
|
"loss": 0.086, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.0001969700824464316, |
|
"loss": 0.0044, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00019695423783453088, |
|
"loss": 0.0033, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00019693835254243287, |
|
"loss": 0.0028, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 0.97, |
|
"learning_rate": 0.00019692242657680286, |
|
"loss": 0.0513, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00019690645994432305, |
|
"loss": 0.0038, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.00019689045265169273, |
|
"loss": 0.0032, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0001968744047056283, |
|
"loss": 0.0026, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.98, |
|
"learning_rate": 0.0001968583161128631, |
|
"loss": 0.0099, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00019684218688014772, |
|
"loss": 0.0675, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.0001968260170142496, |
|
"loss": 0.0425, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"eval_loss": 0.019980641081929207, |
|
"eval_runtime": 126.04, |
|
"eval_samples_per_second": 1.016, |
|
"eval_steps_per_second": 0.341, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00019680980652195333, |
|
"loss": 0.0016, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 0.99, |
|
"learning_rate": 0.00019679355541006054, |
|
"loss": 0.0017, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0001967772636853899, |
|
"loss": 0.0009, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00019676093135477713, |
|
"loss": 0.095, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00019674455842507492, |
|
"loss": 0.0646, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.0001967281449031531, |
|
"loss": 0.0014, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"learning_rate": 0.00019671169079589848, |
|
"loss": 0.001, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00019669519611021486, |
|
"loss": 0.0223, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00019667866085302312, |
|
"loss": 0.0006, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00019666208503126112, |
|
"loss": 0.0609, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 0.00019664546865188386, |
|
"loss": 0.1217, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019662881172186313, |
|
"loss": 0.0006, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019661211424818798, |
|
"loss": 0.0005, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019659537623786428, |
|
"loss": 0.0518, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.02, |
|
"learning_rate": 0.00019657859769791505, |
|
"loss": 0.0364, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019656177863538026, |
|
"loss": 0.0003, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0001965449190573168, |
|
"loss": 0.0604, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.00019652801897079869, |
|
"loss": 0.0569, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.03, |
|
"learning_rate": 0.0001965110783829169, |
|
"loss": 0.0875, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019649409730077935, |
|
"loss": 0.0554, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019647707573151098, |
|
"loss": 0.0004, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019646001368225382, |
|
"loss": 0.035, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 0.00019644291116016667, |
|
"loss": 0.0007, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0001964257681724255, |
|
"loss": 0.0064, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019640858472622316, |
|
"loss": 0.0008, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.00019639136082876953, |
|
"loss": 0.0009, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.05, |
|
"learning_rate": 0.0001963740964872914, |
|
"loss": 0.0027, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00019635679170903258, |
|
"loss": 0.0356, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00019633944650125388, |
|
"loss": 0.0013, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00019632206087123296, |
|
"loss": 0.0179, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.06, |
|
"learning_rate": 0.00019630463482626454, |
|
"loss": 0.0477, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019628716837366027, |
|
"loss": 0.0004, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019626966152074874, |
|
"loss": 0.0795, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019625211427487548, |
|
"loss": 0.1192, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 0.00019623452664340306, |
|
"loss": 0.0606, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019621689863371083, |
|
"loss": 0.0004, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.0001961992302531952, |
|
"loss": 0.1189, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019618152150926955, |
|
"loss": 0.0015, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.08, |
|
"learning_rate": 0.00019616377240936407, |
|
"loss": 0.0004, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.000196145982960926, |
|
"loss": 0.0004, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019612815317141945, |
|
"loss": 0.0003, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.00019611028304832546, |
|
"loss": 0.0003, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.09, |
|
"learning_rate": 0.000196092372599142, |
|
"loss": 0.0153, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.000196074421831384, |
|
"loss": 0.0725, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00019605643075258321, |
|
"loss": 0.0005, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00019603839937028838, |
|
"loss": 0.0005, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.1, |
|
"learning_rate": 0.00019602032769206517, |
|
"loss": 0.0003, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00019600221572549606, |
|
"loss": 0.0416, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00019598406347818054, |
|
"loss": 0.0712, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00019596587095773495, |
|
"loss": 0.0005, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 0.00019594763817179254, |
|
"loss": 0.0095, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00019592936512800342, |
|
"loss": 0.0294, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00019591105183403462, |
|
"loss": 0.0164, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.00019589269829757008, |
|
"loss": 0.0021, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.12, |
|
"learning_rate": 0.0001958743045263106, |
|
"loss": 0.0017, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00019585587052797389, |
|
"loss": 0.0439, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00019583739631029445, |
|
"loss": 0.0014, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.00019581888188102375, |
|
"loss": 0.0944, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.13, |
|
"learning_rate": 0.0001958003272479301, |
|
"loss": 0.0444, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00019578173241879872, |
|
"loss": 0.0063, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0001957630974014316, |
|
"loss": 0.0054, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.00019574442220364767, |
|
"loss": 0.0567, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 0.0001957257068332827, |
|
"loss": 0.0019, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00019570695129818926, |
|
"loss": 0.0355, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0001956881556062369, |
|
"loss": 0.0891, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.0001956693197653119, |
|
"loss": 0.0026, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.15, |
|
"learning_rate": 0.00019565044378331745, |
|
"loss": 0.0609, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00019563152766817354, |
|
"loss": 0.1015, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00019561257142781705, |
|
"loss": 0.0434, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.00019559357507020162, |
|
"loss": 0.0016, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.16, |
|
"learning_rate": 0.0001955745386032978, |
|
"loss": 0.0339, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00019555546203509297, |
|
"loss": 0.0305, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00019553634537359122, |
|
"loss": 0.0003, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00019551718862681364, |
|
"loss": 0.001, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 0.00019549799180279792, |
|
"loss": 0.0016, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00019547875490959885, |
|
"loss": 0.0265, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00019545947795528777, |
|
"loss": 0.0248, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00019544016094795295, |
|
"loss": 0.0443, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.18, |
|
"learning_rate": 0.00019542080389569946, |
|
"loss": 0.0005, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00019540140680664913, |
|
"loss": 0.0005, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00019538196968894067, |
|
"loss": 0.0001, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00019536249255072948, |
|
"loss": 0.0002, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.19, |
|
"learning_rate": 0.00019534297540018785, |
|
"loss": 0.0002, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.00019532341824550479, |
|
"loss": 0.0526, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0001953038210948861, |
|
"loss": 0.0002, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0001952841839565544, |
|
"loss": 0.0631, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0001952645068387491, |
|
"loss": 0.0252, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 0.0001952447897497263, |
|
"loss": 0.0002, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00019522503269775899, |
|
"loss": 0.056, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00019520523569113677, |
|
"loss": 0.0001, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00019518539873816617, |
|
"loss": 0.0591, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 0.00019516552184717037, |
|
"loss": 0.0257, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00019514560502648936, |
|
"loss": 0.0989, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00019512564828447988, |
|
"loss": 0.0679, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.00019510565162951537, |
|
"loss": 0.0002, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.22, |
|
"learning_rate": 0.0001950856150699861, |
|
"loss": 0.0662, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00019506553861429898, |
|
"loss": 0.0004, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0001950454222708778, |
|
"loss": 0.0464, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.00019502526604816295, |
|
"loss": 0.0657, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.23, |
|
"learning_rate": 0.0001950050699546116, |
|
"loss": 0.0001, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00019498483399869767, |
|
"loss": 0.0327, |
|
"step": 501 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.0001949645581889118, |
|
"loss": 0.0701, |
|
"step": 502 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00019494424253376134, |
|
"loss": 0.0003, |
|
"step": 503 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 0.00019492388704177036, |
|
"loss": 0.0001, |
|
"step": 504 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00019490349172147963, |
|
"loss": 0.0002, |
|
"step": 505 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00019488305658144667, |
|
"loss": 0.0633, |
|
"step": 506 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00019486258163024567, |
|
"loss": 0.0066, |
|
"step": 507 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"learning_rate": 0.00019484206687646753, |
|
"loss": 0.0001, |
|
"step": 508 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.0001948215123287199, |
|
"loss": 0.0001, |
|
"step": 509 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00019480091799562704, |
|
"loss": 0.003, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00019478028388583, |
|
"loss": 0.0005, |
|
"step": 511 |
|
}, |
|
{ |
|
"epoch": 1.26, |
|
"learning_rate": 0.00019475961000798645, |
|
"loss": 0.0002, |
|
"step": 512 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00019473889637077073, |
|
"loss": 0.0002, |
|
"step": 513 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.0001947181429828739, |
|
"loss": 0.0594, |
|
"step": 514 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00019469734985300371, |
|
"loss": 0.0003, |
|
"step": 515 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 0.00019467651698988462, |
|
"loss": 0.0012, |
|
"step": 516 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00019465564440225767, |
|
"loss": 0.0003, |
|
"step": 517 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.0001946347320988806, |
|
"loss": 0.0001, |
|
"step": 518 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00019461378008852785, |
|
"loss": 0.0029, |
|
"step": 519 |
|
}, |
|
{ |
|
"epoch": 1.28, |
|
"learning_rate": 0.00019459278837999046, |
|
"loss": 0.0017, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.0001945717569820762, |
|
"loss": 0.009, |
|
"step": 521 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00019455068590360942, |
|
"loss": 0.0014, |
|
"step": 522 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00019452957515343118, |
|
"loss": 0.0007, |
|
"step": 523 |
|
}, |
|
{ |
|
"epoch": 1.29, |
|
"learning_rate": 0.00019450842474039913, |
|
"loss": 0.0011, |
|
"step": 524 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00019448723467338763, |
|
"loss": 0.0279, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00019446600496128758, |
|
"loss": 0.0443, |
|
"step": 526 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00019444473561300668, |
|
"loss": 0.0261, |
|
"step": 527 |
|
}, |
|
{ |
|
"epoch": 1.3, |
|
"learning_rate": 0.00019442342663746902, |
|
"loss": 0.06, |
|
"step": 528 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00019440207804361553, |
|
"loss": 0.0002, |
|
"step": 529 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00019438068984040365, |
|
"loss": 0.0514, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.0001943592620368075, |
|
"loss": 0.0004, |
|
"step": 531 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 0.00019433779464181778, |
|
"loss": 0.0593, |
|
"step": 532 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00019431628766444182, |
|
"loss": 0.0122, |
|
"step": 533 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00019429474111370352, |
|
"loss": 0.0393, |
|
"step": 534 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.00019427315499864344, |
|
"loss": 0.062, |
|
"step": 535 |
|
}, |
|
{ |
|
"epoch": 1.32, |
|
"learning_rate": 0.0001942515293283187, |
|
"loss": 0.0549, |
|
"step": 536 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.000194229864111803, |
|
"loss": 0.0005, |
|
"step": 537 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00019420815935818672, |
|
"loss": 0.0004, |
|
"step": 538 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00019418641507657673, |
|
"loss": 0.0561, |
|
"step": 539 |
|
}, |
|
{ |
|
"epoch": 1.33, |
|
"learning_rate": 0.00019416463127609656, |
|
"loss": 0.0574, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00019414280796588624, |
|
"loss": 0.0573, |
|
"step": 541 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00019412094515510248, |
|
"loss": 0.0012, |
|
"step": 542 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.0001940990428529185, |
|
"loss": 0.0064, |
|
"step": 543 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 0.00019407710106852404, |
|
"loss": 0.0661, |
|
"step": 544 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.0001940551198111255, |
|
"loss": 0.0512, |
|
"step": 545 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00019403309908994586, |
|
"loss": 0.001, |
|
"step": 546 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00019401103891422455, |
|
"loss": 0.042, |
|
"step": 547 |
|
}, |
|
{ |
|
"epoch": 1.35, |
|
"learning_rate": 0.00019398893929321761, |
|
"loss": 0.0537, |
|
"step": 548 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00019396680023619765, |
|
"loss": 0.2087, |
|
"step": 549 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00019394462175245381, |
|
"loss": 0.0287, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00019392240385129173, |
|
"loss": 0.0028, |
|
"step": 551 |
|
}, |
|
{ |
|
"epoch": 1.36, |
|
"learning_rate": 0.00019390014654203369, |
|
"loss": 0.0006, |
|
"step": 552 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00019387784983401838, |
|
"loss": 0.0008, |
|
"step": 553 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0001938555137366011, |
|
"loss": 0.0403, |
|
"step": 554 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.0001938331382591537, |
|
"loss": 0.0014, |
|
"step": 555 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 0.00019381072341106452, |
|
"loss": 0.001, |
|
"step": 556 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00019378826920173837, |
|
"loss": 0.0037, |
|
"step": 557 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.0001937657756405966, |
|
"loss": 0.0008, |
|
"step": 558 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00019374324273707715, |
|
"loss": 0.0008, |
|
"step": 559 |
|
}, |
|
{ |
|
"epoch": 1.38, |
|
"learning_rate": 0.00019372067050063438, |
|
"loss": 0.0675, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00019369805894073919, |
|
"loss": 0.041, |
|
"step": 561 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00019367540806687893, |
|
"loss": 0.0299, |
|
"step": 562 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.00019365271788855757, |
|
"loss": 0.0006, |
|
"step": 563 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"learning_rate": 0.0001936299884152954, |
|
"loss": 0.0125, |
|
"step": 564 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00019360721965662933, |
|
"loss": 0.032, |
|
"step": 565 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.0001935844116221127, |
|
"loss": 0.0252, |
|
"step": 566 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00019356156432131534, |
|
"loss": 0.0639, |
|
"step": 567 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00019353867776382354, |
|
"loss": 0.0005, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 1.4, |
|
"learning_rate": 0.00019351575195924013, |
|
"loss": 0.0014, |
|
"step": 569 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00019349278691718427, |
|
"loss": 0.0019, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00019346978264729172, |
|
"loss": 0.1464, |
|
"step": 571 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.0001934467391592146, |
|
"loss": 0.0019, |
|
"step": 572 |
|
}, |
|
{ |
|
"epoch": 1.41, |
|
"learning_rate": 0.00019342365646262156, |
|
"loss": 0.0376, |
|
"step": 573 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00019340053456719768, |
|
"loss": 0.0007, |
|
"step": 574 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00019337737348264447, |
|
"loss": 0.0007, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.00019335417321867987, |
|
"loss": 0.001, |
|
"step": 576 |
|
}, |
|
{ |
|
"epoch": 1.42, |
|
"learning_rate": 0.0001933309337850383, |
|
"loss": 0.0108, |
|
"step": 577 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.0001933076551914706, |
|
"loss": 0.0225, |
|
"step": 578 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.000193284337447744, |
|
"loss": 0.063, |
|
"step": 579 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00019326098056364222, |
|
"loss": 0.0476, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 1.43, |
|
"learning_rate": 0.00019323758454896538, |
|
"loss": 0.0011, |
|
"step": 581 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00019321414941353003, |
|
"loss": 0.0003, |
|
"step": 582 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.0001931906751671691, |
|
"loss": 0.0262, |
|
"step": 583 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00019316716181973188, |
|
"loss": 0.0005, |
|
"step": 584 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 0.00019314360938108425, |
|
"loss": 0.084, |
|
"step": 585 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00019312001786110828, |
|
"loss": 0.0512, |
|
"step": 586 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.0001930963872697026, |
|
"loss": 0.0235, |
|
"step": 587 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00019307271761678213, |
|
"loss": 0.0002, |
|
"step": 588 |
|
}, |
|
{ |
|
"epoch": 1.45, |
|
"learning_rate": 0.00019304900891227824, |
|
"loss": 0.0004, |
|
"step": 589 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00019302526116613864, |
|
"loss": 0.0491, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00019300147438832744, |
|
"loss": 0.0595, |
|
"step": 591 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00019297764858882514, |
|
"loss": 0.0005, |
|
"step": 592 |
|
}, |
|
{ |
|
"epoch": 1.46, |
|
"learning_rate": 0.00019295378377762862, |
|
"loss": 0.0693, |
|
"step": 593 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00019292987996475113, |
|
"loss": 0.0024, |
|
"step": 594 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00019290593716022217, |
|
"loss": 0.001, |
|
"step": 595 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.0001928819553740878, |
|
"loss": 0.0134, |
|
"step": 596 |
|
}, |
|
{ |
|
"epoch": 1.47, |
|
"learning_rate": 0.00019285793461641028, |
|
"loss": 0.0295, |
|
"step": 597 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00019283387489726827, |
|
"loss": 0.0006, |
|
"step": 598 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.0001928097762267568, |
|
"loss": 0.0009, |
|
"step": 599 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00019278563861498723, |
|
"loss": 0.0465, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"eval_loss": 0.017965465784072876, |
|
"eval_runtime": 126.0107, |
|
"eval_samples_per_second": 1.016, |
|
"eval_steps_per_second": 0.341, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 1.48, |
|
"learning_rate": 0.00019276146207208728, |
|
"loss": 0.039, |
|
"step": 601 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00019273724660820088, |
|
"loss": 0.0005, |
|
"step": 602 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00019271299223348848, |
|
"loss": 0.071, |
|
"step": 603 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00019268869895812672, |
|
"loss": 0.0007, |
|
"step": 604 |
|
}, |
|
{ |
|
"epoch": 1.49, |
|
"learning_rate": 0.00019266436679230865, |
|
"loss": 0.1072, |
|
"step": 605 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00019263999574624355, |
|
"loss": 0.0503, |
|
"step": 606 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00019261558583015707, |
|
"loss": 0.0451, |
|
"step": 607 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.0001925911370542912, |
|
"loss": 0.0005, |
|
"step": 608 |
|
}, |
|
{ |
|
"epoch": 1.5, |
|
"learning_rate": 0.00019256664942890413, |
|
"loss": 0.0202, |
|
"step": 609 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00019254212296427044, |
|
"loss": 0.0401, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00019251755767068097, |
|
"loss": 0.1015, |
|
"step": 611 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00019249295355844285, |
|
"loss": 0.0009, |
|
"step": 612 |
|
}, |
|
{ |
|
"epoch": 1.51, |
|
"learning_rate": 0.00019246831063787957, |
|
"loss": 0.0007, |
|
"step": 613 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00019244362891933077, |
|
"loss": 0.0007, |
|
"step": 614 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00019241890841315248, |
|
"loss": 0.0726, |
|
"step": 615 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.00019239414912971696, |
|
"loss": 0.0022, |
|
"step": 616 |
|
}, |
|
{ |
|
"epoch": 1.52, |
|
"learning_rate": 0.0001923693510794127, |
|
"loss": 0.1312, |
|
"step": 617 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0001923445142726446, |
|
"loss": 0.0742, |
|
"step": 618 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00019231963871983366, |
|
"loss": 0.043, |
|
"step": 619 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.0001922947244314172, |
|
"loss": 0.0899, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 1.53, |
|
"learning_rate": 0.00019226977141784875, |
|
"loss": 0.0004, |
|
"step": 621 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0001922447796895982, |
|
"loss": 0.0003, |
|
"step": 622 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0001922197492571516, |
|
"loss": 0.0003, |
|
"step": 623 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.00019219468013101124, |
|
"loss": 0.0015, |
|
"step": 624 |
|
}, |
|
{ |
|
"epoch": 1.54, |
|
"learning_rate": 0.0001921695723216957, |
|
"loss": 0.0003, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00019214442583973966, |
|
"loss": 0.001, |
|
"step": 626 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.0001921192406956942, |
|
"loss": 0.0004, |
|
"step": 627 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00019209401690012653, |
|
"loss": 0.0651, |
|
"step": 628 |
|
}, |
|
{ |
|
"epoch": 1.55, |
|
"learning_rate": 0.00019206875446362001, |
|
"loss": 0.0938, |
|
"step": 629 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00019204345339677442, |
|
"loss": 0.0486, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0001920181137102055, |
|
"loss": 0.0281, |
|
"step": 631 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.00019199273541454538, |
|
"loss": 0.0003, |
|
"step": 632 |
|
}, |
|
{ |
|
"epoch": 1.56, |
|
"learning_rate": 0.0001919673185204423, |
|
"loss": 0.0001, |
|
"step": 633 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00019194186303856067, |
|
"loss": 0.0002, |
|
"step": 634 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00019191636897958122, |
|
"loss": 0.0003, |
|
"step": 635 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00019189083635420075, |
|
"loss": 0.0004, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 1.57, |
|
"learning_rate": 0.00019186526517313225, |
|
"loss": 0.0003, |
|
"step": 637 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00019183965544710495, |
|
"loss": 0.0611, |
|
"step": 638 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.0001918140071868642, |
|
"loss": 0.0528, |
|
"step": 639 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00019178832040317155, |
|
"loss": 0.0948, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 1.58, |
|
"learning_rate": 0.00019176259510680463, |
|
"loss": 0.0002, |
|
"step": 641 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0001917368313085574, |
|
"loss": 0.0003, |
|
"step": 642 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0001917110290192398, |
|
"loss": 0.0005, |
|
"step": 643 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.00019168518824967795, |
|
"loss": 0.0415, |
|
"step": 644 |
|
}, |
|
{ |
|
"epoch": 1.59, |
|
"learning_rate": 0.0001916593090107143, |
|
"loss": 0.0259, |
|
"step": 645 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00019163339131320718, |
|
"loss": 0.0261, |
|
"step": 646 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.0001916074351680312, |
|
"loss": 0.0001, |
|
"step": 647 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00019158144058607708, |
|
"loss": 0.0001, |
|
"step": 648 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00019155540757825168, |
|
"loss": 0.0002, |
|
"step": 649 |
|
}, |
|
{ |
|
"epoch": 1.6, |
|
"learning_rate": 0.00019152933615547798, |
|
"loss": 0.031, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00019150322632869497, |
|
"loss": 0.0002, |
|
"step": 651 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.000191477078108858, |
|
"loss": 0.0405, |
|
"step": 652 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00019145089150693822, |
|
"loss": 0.0651, |
|
"step": 653 |
|
}, |
|
{ |
|
"epoch": 1.61, |
|
"learning_rate": 0.00019142466653392318, |
|
"loss": 0.0001, |
|
"step": 654 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0001913984032008163, |
|
"loss": 0.0903, |
|
"step": 655 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0001913721015186372, |
|
"loss": 0.0001, |
|
"step": 656 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.00019134576149842163, |
|
"loss": 0.0271, |
|
"step": 657 |
|
}, |
|
{ |
|
"epoch": 1.62, |
|
"learning_rate": 0.0001913193831512213, |
|
"loss": 0.0002, |
|
"step": 658 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00019129296648810412, |
|
"loss": 0.0406, |
|
"step": 659 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00019126651152015403, |
|
"loss": 0.0484, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.00019124001825847103, |
|
"loss": 0.0004, |
|
"step": 661 |
|
}, |
|
{ |
|
"epoch": 1.63, |
|
"learning_rate": 0.0001912134867141712, |
|
"loss": 0.0509, |
|
"step": 662 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.00019118691689838668, |
|
"loss": 0.0436, |
|
"step": 663 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0001911603088222657, |
|
"loss": 0.0358, |
|
"step": 664 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0001911336624969725, |
|
"loss": 0.0447, |
|
"step": 665 |
|
}, |
|
{ |
|
"epoch": 1.64, |
|
"learning_rate": 0.0001911069779336873, |
|
"loss": 0.0439, |
|
"step": 666 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00019108025514360662, |
|
"loss": 0.0617, |
|
"step": 667 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00019105349413794272, |
|
"loss": 0.0001, |
|
"step": 668 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00019102669492792405, |
|
"loss": 0.0001, |
|
"step": 669 |
|
}, |
|
{ |
|
"epoch": 1.65, |
|
"learning_rate": 0.00019099985752479506, |
|
"loss": 0.0001, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00019097298193981624, |
|
"loss": 0.0224, |
|
"step": 671 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00019094606818426403, |
|
"loss": 0.0407, |
|
"step": 672 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00019091911626943102, |
|
"loss": 0.0632, |
|
"step": 673 |
|
}, |
|
{ |
|
"epoch": 1.66, |
|
"learning_rate": 0.00019089212620662568, |
|
"loss": 0.0505, |
|
"step": 674 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00019086509800717258, |
|
"loss": 0.0001, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00019083803168241223, |
|
"loss": 0.0465, |
|
"step": 676 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00019081092724370114, |
|
"loss": 0.0001, |
|
"step": 677 |
|
}, |
|
{ |
|
"epoch": 1.67, |
|
"learning_rate": 0.00019078378470241183, |
|
"loss": 0.0312, |
|
"step": 678 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00019075660406993284, |
|
"loss": 0.0892, |
|
"step": 679 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00019072938535766865, |
|
"loss": 0.0007, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00019070212857703967, |
|
"loss": 0.0002, |
|
"step": 681 |
|
}, |
|
{ |
|
"epoch": 1.68, |
|
"learning_rate": 0.00019067483373948243, |
|
"loss": 0.0005, |
|
"step": 682 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00019064750085644926, |
|
"loss": 0.0545, |
|
"step": 683 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00019062012993940859, |
|
"loss": 0.0501, |
|
"step": 684 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.0001905927209998447, |
|
"loss": 0.0001, |
|
"step": 685 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 0.00019056527404925789, |
|
"loss": 0.1453, |
|
"step": 686 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00019053778909916438, |
|
"loss": 0.0001, |
|
"step": 687 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00019051026616109638, |
|
"loss": 0.0962, |
|
"step": 688 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.00019048270524660196, |
|
"loss": 0.0436, |
|
"step": 689 |
|
}, |
|
{ |
|
"epoch": 1.7, |
|
"learning_rate": 0.0001904551063672452, |
|
"loss": 0.1079, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00019042746953460606, |
|
"loss": 0.0687, |
|
"step": 691 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00019039979476028043, |
|
"loss": 0.0853, |
|
"step": 692 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.00019037208205588017, |
|
"loss": 0.0943, |
|
"step": 693 |
|
}, |
|
{ |
|
"epoch": 1.71, |
|
"learning_rate": 0.000190344331433033, |
|
"loss": 0.0001, |
|
"step": 694 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00019031654290338254, |
|
"loss": 0.0375, |
|
"step": 695 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00019028871647858834, |
|
"loss": 0.0001, |
|
"step": 696 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00019026085217032593, |
|
"loss": 0.0001, |
|
"step": 697 |
|
}, |
|
{ |
|
"epoch": 1.72, |
|
"learning_rate": 0.00019023294999028653, |
|
"loss": 0.1158, |
|
"step": 698 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00019020500995017747, |
|
"loss": 0.0398, |
|
"step": 699 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00019017703206172185, |
|
"loss": 0.0001, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.00019014901633665867, |
|
"loss": 0.0001, |
|
"step": 701 |
|
}, |
|
{ |
|
"epoch": 1.73, |
|
"learning_rate": 0.0001901209627867428, |
|
"loss": 0.0473, |
|
"step": 702 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.000190092871423745, |
|
"loss": 0.0528, |
|
"step": 703 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001900647422594519, |
|
"loss": 0.0581, |
|
"step": 704 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001900365753056659, |
|
"loss": 0.0434, |
|
"step": 705 |
|
}, |
|
{ |
|
"epoch": 1.74, |
|
"learning_rate": 0.0001900083705742054, |
|
"loss": 0.0001, |
|
"step": 706 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00018998012807690457, |
|
"loss": 0.0886, |
|
"step": 707 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00018995184782561345, |
|
"loss": 0.0391, |
|
"step": 708 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00018992352983219785, |
|
"loss": 0.0006, |
|
"step": 709 |
|
}, |
|
{ |
|
"epoch": 1.75, |
|
"learning_rate": 0.00018989517410853955, |
|
"loss": 0.0002, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00018986678066653601, |
|
"loss": 0.0001, |
|
"step": 711 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.0001898383495181007, |
|
"loss": 0.0006, |
|
"step": 712 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00018980988067516266, |
|
"loss": 0.0539, |
|
"step": 713 |
|
}, |
|
{ |
|
"epoch": 1.76, |
|
"learning_rate": 0.00018978137414966698, |
|
"loss": 0.0211, |
|
"step": 714 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00018975282995357446, |
|
"loss": 0.0355, |
|
"step": 715 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0001897242480988617, |
|
"loss": 0.0532, |
|
"step": 716 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.0001896956285975211, |
|
"loss": 0.0354, |
|
"step": 717 |
|
}, |
|
{ |
|
"epoch": 1.77, |
|
"learning_rate": 0.00018966697146156092, |
|
"loss": 0.0004, |
|
"step": 718 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.0001896382767030051, |
|
"loss": 0.0418, |
|
"step": 719 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00018960954433389345, |
|
"loss": 0.0502, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.00018958077436628158, |
|
"loss": 0.0004, |
|
"step": 721 |
|
}, |
|
{ |
|
"epoch": 1.78, |
|
"learning_rate": 0.0001895519668122408, |
|
"loss": 0.0001, |
|
"step": 722 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00018952312168385823, |
|
"loss": 0.0509, |
|
"step": 723 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.0001894942389932367, |
|
"loss": 0.0003, |
|
"step": 724 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00018946531875249493, |
|
"loss": 0.0006, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 1.79, |
|
"learning_rate": 0.00018943636097376726, |
|
"loss": 0.019, |
|
"step": 726 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00018940736566920387, |
|
"loss": 0.0002, |
|
"step": 727 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00018937833285097066, |
|
"loss": 0.0004, |
|
"step": 728 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00018934926253124921, |
|
"loss": 0.0258, |
|
"step": 729 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.00018932015472223693, |
|
"loss": 0.0001, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 1.8, |
|
"learning_rate": 0.0001892910094361469, |
|
"loss": 0.0008, |
|
"step": 731 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00018926182668520792, |
|
"loss": 0.0479, |
|
"step": 732 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00018923260648166457, |
|
"loss": 0.0008, |
|
"step": 733 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.0001892033488377771, |
|
"loss": 0.1391, |
|
"step": 734 |
|
}, |
|
{ |
|
"epoch": 1.81, |
|
"learning_rate": 0.00018917405376582145, |
|
"loss": 0.0029, |
|
"step": 735 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0001891447212780893, |
|
"loss": 0.0002, |
|
"step": 736 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00018911535138688802, |
|
"loss": 0.0002, |
|
"step": 737 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.0001890859441045407, |
|
"loss": 0.0877, |
|
"step": 738 |
|
}, |
|
{ |
|
"epoch": 1.82, |
|
"learning_rate": 0.00018905649944338598, |
|
"loss": 0.0014, |
|
"step": 739 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.0001890270174157784, |
|
"loss": 0.0006, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00018899749803408806, |
|
"loss": 0.1397, |
|
"step": 741 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00018896794131070073, |
|
"loss": 0.1003, |
|
"step": 742 |
|
}, |
|
{ |
|
"epoch": 1.83, |
|
"learning_rate": 0.00018893834725801782, |
|
"loss": 0.0558, |
|
"step": 743 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.0001889087158884565, |
|
"loss": 0.0016, |
|
"step": 744 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00018887904721444953, |
|
"loss": 0.0328, |
|
"step": 745 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00018884934124844532, |
|
"loss": 0.0359, |
|
"step": 746 |
|
}, |
|
{ |
|
"epoch": 1.84, |
|
"learning_rate": 0.00018881959800290797, |
|
"loss": 0.0856, |
|
"step": 747 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00018878981749031716, |
|
"loss": 0.0639, |
|
"step": 748 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00018875999972316825, |
|
"loss": 0.0152, |
|
"step": 749 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.00018873014471397224, |
|
"loss": 0.0043, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 1.85, |
|
"learning_rate": 0.0001887002524752557, |
|
"loss": 0.0024, |
|
"step": 751 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00018867032301956088, |
|
"loss": 0.0007, |
|
"step": 752 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00018864035635944562, |
|
"loss": 0.0005, |
|
"step": 753 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00018861035250748343, |
|
"loss": 0.0556, |
|
"step": 754 |
|
}, |
|
{ |
|
"epoch": 1.86, |
|
"learning_rate": 0.00018858031147626325, |
|
"loss": 0.0383, |
|
"step": 755 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00018855023327838983, |
|
"loss": 0.0003, |
|
"step": 756 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.0001885201179264834, |
|
"loss": 0.0006, |
|
"step": 757 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00018848996543317982, |
|
"loss": 0.0033, |
|
"step": 758 |
|
}, |
|
{ |
|
"epoch": 1.87, |
|
"learning_rate": 0.00018845977581113046, |
|
"loss": 0.0003, |
|
"step": 759 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00018842954907300236, |
|
"loss": 0.0507, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00018839928523147812, |
|
"loss": 0.181, |
|
"step": 761 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.00018836898429925585, |
|
"loss": 0.0007, |
|
"step": 762 |
|
}, |
|
{ |
|
"epoch": 1.88, |
|
"learning_rate": 0.0001883386462890493, |
|
"loss": 0.0371, |
|
"step": 763 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0001883082712135877, |
|
"loss": 0.0005, |
|
"step": 764 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00018827785908561584, |
|
"loss": 0.0009, |
|
"step": 765 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.00018824740991789415, |
|
"loss": 0.0009, |
|
"step": 766 |
|
}, |
|
{ |
|
"epoch": 1.89, |
|
"learning_rate": 0.0001882169237231985, |
|
"loss": 0.0014, |
|
"step": 767 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00018818640051432035, |
|
"loss": 0.0451, |
|
"step": 768 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.00018815584030406664, |
|
"loss": 0.0006, |
|
"step": 769 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0001881252431052599, |
|
"loss": 0.0046, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 1.9, |
|
"learning_rate": 0.0001880946089307381, |
|
"loss": 0.0058, |
|
"step": 771 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00018806393779335483, |
|
"loss": 0.0002, |
|
"step": 772 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00018803322970597908, |
|
"loss": 0.0012, |
|
"step": 773 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00018800248468149543, |
|
"loss": 0.1054, |
|
"step": 774 |
|
}, |
|
{ |
|
"epoch": 1.91, |
|
"learning_rate": 0.00018797170273280388, |
|
"loss": 0.0132, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00018794088387282, |
|
"loss": 0.0526, |
|
"step": 776 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00018791002811447481, |
|
"loss": 0.0004, |
|
"step": 777 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00018787913547071484, |
|
"loss": 0.0421, |
|
"step": 778 |
|
}, |
|
{ |
|
"epoch": 1.92, |
|
"learning_rate": 0.00018784820595450197, |
|
"loss": 0.0585, |
|
"step": 779 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00018781723957881372, |
|
"loss": 0.0561, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00018778623635664303, |
|
"loss": 0.0618, |
|
"step": 781 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.0001877551963009982, |
|
"loss": 0.0473, |
|
"step": 782 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 0.00018772411942490313, |
|
"loss": 0.0004, |
|
"step": 783 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0001876930057413971, |
|
"loss": 0.001, |
|
"step": 784 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.0001876618552635348, |
|
"loss": 0.0439, |
|
"step": 785 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00018763066800438636, |
|
"loss": 0.068, |
|
"step": 786 |
|
}, |
|
{ |
|
"epoch": 1.94, |
|
"learning_rate": 0.00018759944397703747, |
|
"loss": 0.035, |
|
"step": 787 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00018756818319458907, |
|
"loss": 0.0005, |
|
"step": 788 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.0001875368856701576, |
|
"loss": 0.0472, |
|
"step": 789 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.000187505551416875, |
|
"loss": 0.0495, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 1.95, |
|
"learning_rate": 0.00018747418044788846, |
|
"loss": 0.0657, |
|
"step": 791 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.0001874427727763607, |
|
"loss": 0.0005, |
|
"step": 792 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.0001874113284154698, |
|
"loss": 0.076, |
|
"step": 793 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.0001873798473784092, |
|
"loss": 0.0581, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 1.96, |
|
"learning_rate": 0.00018734832967838775, |
|
"loss": 0.0007, |
|
"step": 795 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00018731677532862976, |
|
"loss": 0.0363, |
|
"step": 796 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00018728518434237473, |
|
"loss": 0.0651, |
|
"step": 797 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.00018725355673287778, |
|
"loss": 0.0649, |
|
"step": 798 |
|
}, |
|
{ |
|
"epoch": 1.97, |
|
"learning_rate": 0.0001872218925134092, |
|
"loss": 0.0004, |
|
"step": 799 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00018719019169725472, |
|
"loss": 0.0002, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"eval_loss": 0.014746900647878647, |
|
"eval_runtime": 126.097, |
|
"eval_samples_per_second": 1.015, |
|
"eval_steps_per_second": 0.341, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0001871584542977154, |
|
"loss": 0.0467, |
|
"step": 801 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.00018712668032810768, |
|
"loss": 0.0519, |
|
"step": 802 |
|
}, |
|
{ |
|
"epoch": 1.98, |
|
"learning_rate": 0.0001870948698017633, |
|
"loss": 0.0105, |
|
"step": 803 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00018706302273202943, |
|
"loss": 0.0003, |
|
"step": 804 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00018703113913226847, |
|
"loss": 0.0297, |
|
"step": 805 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.00018699921901585813, |
|
"loss": 0.1403, |
|
"step": 806 |
|
}, |
|
{ |
|
"epoch": 1.99, |
|
"learning_rate": 0.0001869672623961916, |
|
"loss": 0.0525, |
|
"step": 807 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.0001869352692866772, |
|
"loss": 0.0691, |
|
"step": 808 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00018690323970073873, |
|
"loss": 0.0535, |
|
"step": 809 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00018687117365181512, |
|
"loss": 0.0336, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00018683907115336074, |
|
"loss": 0.0262, |
|
"step": 811 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"learning_rate": 0.00018680693221884517, |
|
"loss": 0.0004, |
|
"step": 812 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.00018677475686175338, |
|
"loss": 0.0003, |
|
"step": 813 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.00018674254509558544, |
|
"loss": 0.0402, |
|
"step": 814 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.0001867102969338569, |
|
"loss": 0.0004, |
|
"step": 815 |
|
}, |
|
{ |
|
"epoch": 2.01, |
|
"learning_rate": 0.00018667801239009846, |
|
"loss": 0.0005, |
|
"step": 816 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00018664569147785613, |
|
"loss": 0.005, |
|
"step": 817 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00018661333421069113, |
|
"loss": 0.0008, |
|
"step": 818 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00018658094060217999, |
|
"loss": 0.0608, |
|
"step": 819 |
|
}, |
|
{ |
|
"epoch": 2.02, |
|
"learning_rate": 0.00018654851066591448, |
|
"loss": 0.031, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00018651604441550154, |
|
"loss": 0.0024, |
|
"step": 821 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00018648354186456348, |
|
"loss": 0.0726, |
|
"step": 822 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.00018645100302673774, |
|
"loss": 0.0463, |
|
"step": 823 |
|
}, |
|
{ |
|
"epoch": 2.03, |
|
"learning_rate": 0.000186418427915677, |
|
"loss": 0.0032, |
|
"step": 824 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.0001863858165450492, |
|
"loss": 0.0004, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00018635316892853741, |
|
"loss": 0.0196, |
|
"step": 826 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00018632048507984, |
|
"loss": 0.022, |
|
"step": 827 |
|
}, |
|
{ |
|
"epoch": 2.04, |
|
"learning_rate": 0.00018628776501267052, |
|
"loss": 0.0009, |
|
"step": 828 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0001862550087407577, |
|
"loss": 0.1024, |
|
"step": 829 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0001862222162778454, |
|
"loss": 0.0796, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.00018618938763769282, |
|
"loss": 0.0004, |
|
"step": 831 |
|
}, |
|
{ |
|
"epoch": 2.05, |
|
"learning_rate": 0.0001861565228340742, |
|
"loss": 0.0004, |
|
"step": 832 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00018612362188077898, |
|
"loss": 0.1187, |
|
"step": 833 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00018609068479161182, |
|
"loss": 0.002, |
|
"step": 834 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00018605771158039253, |
|
"loss": 0.0931, |
|
"step": 835 |
|
}, |
|
{ |
|
"epoch": 2.06, |
|
"learning_rate": 0.00018602470226095603, |
|
"loss": 0.1085, |
|
"step": 836 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.0001859916568471524, |
|
"loss": 0.0022, |
|
"step": 837 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00018595857535284692, |
|
"loss": 0.0291, |
|
"step": 838 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.0001859254577919199, |
|
"loss": 0.032, |
|
"step": 839 |
|
}, |
|
{ |
|
"epoch": 2.07, |
|
"learning_rate": 0.00018589230417826697, |
|
"loss": 0.0046, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.0001858591145257987, |
|
"loss": 0.0018, |
|
"step": 841 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00018582588884844084, |
|
"loss": 0.1096, |
|
"step": 842 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.0001857926271601343, |
|
"loss": 0.0547, |
|
"step": 843 |
|
}, |
|
{ |
|
"epoch": 2.08, |
|
"learning_rate": 0.00018575932947483502, |
|
"loss": 0.0151, |
|
"step": 844 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00018572599580651415, |
|
"loss": 0.071, |
|
"step": 845 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00018569262616915784, |
|
"loss": 0.0062, |
|
"step": 846 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00018565922057676737, |
|
"loss": 0.0039, |
|
"step": 847 |
|
}, |
|
{ |
|
"epoch": 2.09, |
|
"learning_rate": 0.00018562577904335912, |
|
"loss": 0.0017, |
|
"step": 848 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00018559230158296454, |
|
"loss": 0.0013, |
|
"step": 849 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00018555878820963013, |
|
"loss": 0.0011, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00018552523893741748, |
|
"loss": 0.0013, |
|
"step": 851 |
|
}, |
|
{ |
|
"epoch": 2.1, |
|
"learning_rate": 0.00018549165378040327, |
|
"loss": 0.005, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.0001854580327526792, |
|
"loss": 0.0379, |
|
"step": 853 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00018542437586835202, |
|
"loss": 0.0847, |
|
"step": 854 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00018539068314154354, |
|
"loss": 0.0017, |
|
"step": 855 |
|
}, |
|
{ |
|
"epoch": 2.11, |
|
"learning_rate": 0.00018535695458639056, |
|
"loss": 0.0721, |
|
"step": 856 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.000185323190217045, |
|
"loss": 0.0006, |
|
"step": 857 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00018528939004767376, |
|
"loss": 0.0003, |
|
"step": 858 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00018525555409245877, |
|
"loss": 0.0005, |
|
"step": 859 |
|
}, |
|
{ |
|
"epoch": 2.12, |
|
"learning_rate": 0.00018522168236559695, |
|
"loss": 0.0008, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.00018518777488130023, |
|
"loss": 0.0466, |
|
"step": 861 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0001851538316537956, |
|
"loss": 0.0372, |
|
"step": 862 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.00018511985269732497, |
|
"loss": 0.038, |
|
"step": 863 |
|
}, |
|
{ |
|
"epoch": 2.13, |
|
"learning_rate": 0.0001850858380261453, |
|
"loss": 0.0013, |
|
"step": 864 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.00018505178765452853, |
|
"loss": 0.0304, |
|
"step": 865 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.00018501770159676156, |
|
"loss": 0.0465, |
|
"step": 866 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.00018498357986714622, |
|
"loss": 0.0424, |
|
"step": 867 |
|
}, |
|
{ |
|
"epoch": 2.14, |
|
"learning_rate": 0.0001849494224799994, |
|
"loss": 0.0705, |
|
"step": 868 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.0001849152294496529, |
|
"loss": 0.0002, |
|
"step": 869 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.00018488100079045344, |
|
"loss": 0.0009, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.00018484673651676282, |
|
"loss": 0.0002, |
|
"step": 871 |
|
}, |
|
{ |
|
"epoch": 2.15, |
|
"learning_rate": 0.0001848124366429576, |
|
"loss": 0.0455, |
|
"step": 872 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.0001847781011834294, |
|
"loss": 0.0505, |
|
"step": 873 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.00018474373015258473, |
|
"loss": 0.0386, |
|
"step": 874 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.00018470932356484508, |
|
"loss": 0.0509, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 2.16, |
|
"learning_rate": 0.0001846748814346468, |
|
"loss": 0.0003, |
|
"step": 876 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0001846404037764411, |
|
"loss": 0.0351, |
|
"step": 877 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.00018460589060469425, |
|
"loss": 0.0003, |
|
"step": 878 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.0001845713419338873, |
|
"loss": 0.0002, |
|
"step": 879 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 0.00018453675777851627, |
|
"loss": 0.0037, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.00018450213815309198, |
|
"loss": 0.0442, |
|
"step": 881 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.00018446748307214019, |
|
"loss": 0.035, |
|
"step": 882 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.00018443279255020152, |
|
"loss": 0.0875, |
|
"step": 883 |
|
}, |
|
{ |
|
"epoch": 2.18, |
|
"learning_rate": 0.0001843980666018315, |
|
"loss": 0.0418, |
|
"step": 884 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.00018436330524160047, |
|
"loss": 0.0009, |
|
"step": 885 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.00018432850848409363, |
|
"loss": 0.0089, |
|
"step": 886 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.00018429367634391114, |
|
"loss": 0.0017, |
|
"step": 887 |
|
}, |
|
{ |
|
"epoch": 2.19, |
|
"learning_rate": 0.00018425880883566782, |
|
"loss": 0.0004, |
|
"step": 888 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00018422390597399349, |
|
"loss": 0.0464, |
|
"step": 889 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.0001841889677735327, |
|
"loss": 0.0027, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00018415399424894492, |
|
"loss": 0.0004, |
|
"step": 891 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00018411898541490434, |
|
"loss": 0.0928, |
|
"step": 892 |
|
}, |
|
{ |
|
"epoch": 2.2, |
|
"learning_rate": 0.00018408394128610001, |
|
"loss": 0.0042, |
|
"step": 893 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.0001840488618772359, |
|
"loss": 0.002, |
|
"step": 894 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00018401374720303056, |
|
"loss": 0.0002, |
|
"step": 895 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00018397859727821748, |
|
"loss": 0.0004, |
|
"step": 896 |
|
}, |
|
{ |
|
"epoch": 2.21, |
|
"learning_rate": 0.00018394341211754495, |
|
"loss": 0.0987, |
|
"step": 897 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.00018390819173577598, |
|
"loss": 0.0004, |
|
"step": 898 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.0001838729361476884, |
|
"loss": 0.0002, |
|
"step": 899 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.00018383764536807485, |
|
"loss": 0.0002, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 2.22, |
|
"learning_rate": 0.00018380231941174258, |
|
"loss": 0.0422, |
|
"step": 901 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.00018376695829351377, |
|
"loss": 0.0004, |
|
"step": 902 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.0001837315620282253, |
|
"loss": 0.0007, |
|
"step": 903 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.00018369613063072874, |
|
"loss": 0.0231, |
|
"step": 904 |
|
}, |
|
{ |
|
"epoch": 2.23, |
|
"learning_rate": 0.0001836606641158905, |
|
"loss": 0.053, |
|
"step": 905 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.00018362516249859163, |
|
"loss": 0.0003, |
|
"step": 906 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.00018358962579372796, |
|
"loss": 0.0403, |
|
"step": 907 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.00018355405401621001, |
|
"loss": 0.0319, |
|
"step": 908 |
|
}, |
|
{ |
|
"epoch": 2.24, |
|
"learning_rate": 0.0001835184471809631, |
|
"loss": 0.0402, |
|
"step": 909 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.00018348280530292713, |
|
"loss": 0.0628, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.0001834471283970568, |
|
"loss": 0.0937, |
|
"step": 911 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.00018341141647832147, |
|
"loss": 0.0005, |
|
"step": 912 |
|
}, |
|
{ |
|
"epoch": 2.25, |
|
"learning_rate": 0.00018337566956170523, |
|
"loss": 0.05, |
|
"step": 913 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.00018333988766220676, |
|
"loss": 0.0202, |
|
"step": 914 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.00018330407079483952, |
|
"loss": 0.0008, |
|
"step": 915 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.0001832682189746316, |
|
"loss": 0.0003, |
|
"step": 916 |
|
}, |
|
{ |
|
"epoch": 2.26, |
|
"learning_rate": 0.00018323233221662573, |
|
"loss": 0.0037, |
|
"step": 917 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.00018319641053587938, |
|
"loss": 0.0417, |
|
"step": 918 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0001831604539474646, |
|
"loss": 0.0269, |
|
"step": 919 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0001831244624664681, |
|
"loss": 0.048, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 2.27, |
|
"learning_rate": 0.0001830884361079912, |
|
"loss": 0.0801, |
|
"step": 921 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.00018305237488714995, |
|
"loss": 0.0001, |
|
"step": 922 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.00018301627881907494, |
|
"loss": 0.0425, |
|
"step": 923 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.00018298014791891137, |
|
"loss": 0.0323, |
|
"step": 924 |
|
}, |
|
{ |
|
"epoch": 2.28, |
|
"learning_rate": 0.00018294398220181917, |
|
"loss": 0.0631, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.00018290778168297277, |
|
"loss": 0.0005, |
|
"step": 926 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.00018287154637756125, |
|
"loss": 0.0007, |
|
"step": 927 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.00018283527630078825, |
|
"loss": 0.0237, |
|
"step": 928 |
|
}, |
|
{ |
|
"epoch": 2.29, |
|
"learning_rate": 0.00018279897146787204, |
|
"loss": 0.0605, |
|
"step": 929 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.0001827626318940454, |
|
"loss": 0.0871, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00018272625759455582, |
|
"loss": 0.003, |
|
"step": 931 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00018268984858466522, |
|
"loss": 0.0001, |
|
"step": 932 |
|
}, |
|
{ |
|
"epoch": 2.3, |
|
"learning_rate": 0.00018265340487965017, |
|
"loss": 0.0586, |
|
"step": 933 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00018261692649480175, |
|
"loss": 0.0459, |
|
"step": 934 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00018258041344542566, |
|
"loss": 0.0005, |
|
"step": 935 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00018254386574684204, |
|
"loss": 0.027, |
|
"step": 936 |
|
}, |
|
{ |
|
"epoch": 2.31, |
|
"learning_rate": 0.00018250728341438568, |
|
"loss": 0.0001, |
|
"step": 937 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0001824706664634058, |
|
"loss": 0.0824, |
|
"step": 938 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0001824340149092662, |
|
"loss": 0.0112, |
|
"step": 939 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.00018239732876734527, |
|
"loss": 0.0007, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 2.32, |
|
"learning_rate": 0.0001823606080530357, |
|
"loss": 0.0004, |
|
"step": 941 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0001823238527817449, |
|
"loss": 0.1245, |
|
"step": 942 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.0001822870629688947, |
|
"loss": 0.0796, |
|
"step": 943 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.00018225023862992142, |
|
"loss": 0.0712, |
|
"step": 944 |
|
}, |
|
{ |
|
"epoch": 2.33, |
|
"learning_rate": 0.00018221337978027583, |
|
"loss": 0.033, |
|
"step": 945 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.00018217648643542323, |
|
"loss": 0.0025, |
|
"step": 946 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.00018213955861084343, |
|
"loss": 0.0002, |
|
"step": 947 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.0001821025963220306, |
|
"loss": 0.0015, |
|
"step": 948 |
|
}, |
|
{ |
|
"epoch": 2.34, |
|
"learning_rate": 0.0001820655995844935, |
|
"loss": 0.0024, |
|
"step": 949 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.00018202856841375518, |
|
"loss": 0.0435, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.00018199150282535332, |
|
"loss": 0.0511, |
|
"step": 951 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.00018195440283483988, |
|
"loss": 0.0627, |
|
"step": 952 |
|
}, |
|
{ |
|
"epoch": 2.35, |
|
"learning_rate": 0.0001819172684577814, |
|
"loss": 0.0885, |
|
"step": 953 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.0001818800997097587, |
|
"loss": 0.0628, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00018184289660636715, |
|
"loss": 0.0008, |
|
"step": 955 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00018180565916321647, |
|
"loss": 0.0005, |
|
"step": 956 |
|
}, |
|
{ |
|
"epoch": 2.36, |
|
"learning_rate": 0.00018176838739593078, |
|
"loss": 0.0304, |
|
"step": 957 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.0001817310813201486, |
|
"loss": 0.0297, |
|
"step": 958 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.00018169374095152295, |
|
"loss": 0.001, |
|
"step": 959 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.0001816563663057211, |
|
"loss": 0.0022, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 2.37, |
|
"learning_rate": 0.00018161895739842476, |
|
"loss": 0.0411, |
|
"step": 961 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00018158151424533002, |
|
"loss": 0.0012, |
|
"step": 962 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.0001815440368621473, |
|
"loss": 0.0343, |
|
"step": 963 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00018150652526460146, |
|
"loss": 0.0418, |
|
"step": 964 |
|
}, |
|
{ |
|
"epoch": 2.38, |
|
"learning_rate": 0.00018146897946843163, |
|
"loss": 0.1491, |
|
"step": 965 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.00018143139948939137, |
|
"loss": 0.0002, |
|
"step": 966 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.00018139378534324848, |
|
"loss": 0.0006, |
|
"step": 967 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.00018135613704578526, |
|
"loss": 0.0004, |
|
"step": 968 |
|
}, |
|
{ |
|
"epoch": 2.39, |
|
"learning_rate": 0.00018131845461279812, |
|
"loss": 0.0704, |
|
"step": 969 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.000181280738060098, |
|
"loss": 0.0002, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.00018124298740351003, |
|
"loss": 0.0001, |
|
"step": 971 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.00018120520265887363, |
|
"loss": 0.0025, |
|
"step": 972 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.00018116738384204266, |
|
"loss": 0.0254, |
|
"step": 973 |
|
}, |
|
{ |
|
"epoch": 2.4, |
|
"learning_rate": 0.00018112953096888516, |
|
"loss": 0.0003, |
|
"step": 974 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.0001810916440552835, |
|
"loss": 0.1252, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.00018105372311713432, |
|
"loss": 0.0005, |
|
"step": 976 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.0001810157681703485, |
|
"loss": 0.0002, |
|
"step": 977 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 0.0001809777792308513, |
|
"loss": 0.024, |
|
"step": 978 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.00018093975631458217, |
|
"loss": 0.0001, |
|
"step": 979 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.00018090169943749476, |
|
"loss": 0.0566, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.00018086360861555706, |
|
"loss": 0.0003, |
|
"step": 981 |
|
}, |
|
{ |
|
"epoch": 2.42, |
|
"learning_rate": 0.0001808254838647513, |
|
"loss": 0.0443, |
|
"step": 982 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.00018078732520107385, |
|
"loss": 0.0001, |
|
"step": 983 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.00018074913264053545, |
|
"loss": 0.0002, |
|
"step": 984 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.00018071090619916093, |
|
"loss": 0.0553, |
|
"step": 985 |
|
}, |
|
{ |
|
"epoch": 2.43, |
|
"learning_rate": 0.00018067264589298945, |
|
"loss": 0.0981, |
|
"step": 986 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.0001806343517380743, |
|
"loss": 0.001, |
|
"step": 987 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00018059602375048293, |
|
"loss": 0.0004, |
|
"step": 988 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00018055766194629715, |
|
"loss": 0.0001, |
|
"step": 989 |
|
}, |
|
{ |
|
"epoch": 2.44, |
|
"learning_rate": 0.00018051926634161282, |
|
"loss": 0.0004, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.00018048083695254005, |
|
"loss": 0.0494, |
|
"step": 991 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.00018044237379520305, |
|
"loss": 0.089, |
|
"step": 992 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.00018040387688574025, |
|
"loss": 0.0001, |
|
"step": 993 |
|
}, |
|
{ |
|
"epoch": 2.45, |
|
"learning_rate": 0.0001803653462403043, |
|
"loss": 0.056, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.00018032678187506187, |
|
"loss": 0.0343, |
|
"step": 995 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.0001802881838061939, |
|
"loss": 0.041, |
|
"step": 996 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.00018024955204989538, |
|
"loss": 0.0631, |
|
"step": 997 |
|
}, |
|
{ |
|
"epoch": 2.46, |
|
"learning_rate": 0.00018021088662237552, |
|
"loss": 0.0001, |
|
"step": 998 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.0001801721875398576, |
|
"loss": 0.0002, |
|
"step": 999 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.00018013345481857903, |
|
"loss": 0.0001, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"eval_loss": 0.016305094584822655, |
|
"eval_runtime": 126.0835, |
|
"eval_samples_per_second": 1.015, |
|
"eval_steps_per_second": 0.341, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.0001800946884747913, |
|
"loss": 0.0365, |
|
"step": 1001 |
|
}, |
|
{ |
|
"epoch": 2.47, |
|
"learning_rate": 0.00018005588852476015, |
|
"loss": 0.0001, |
|
"step": 1002 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.00018001705498476523, |
|
"loss": 0.0412, |
|
"step": 1003 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.00017997818787110042, |
|
"loss": 0.0001, |
|
"step": 1004 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0001799392872000736, |
|
"loss": 0.0001, |
|
"step": 1005 |
|
}, |
|
{ |
|
"epoch": 2.48, |
|
"learning_rate": 0.0001799003529880068, |
|
"loss": 0.0, |
|
"step": 1006 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.00017986138525123607, |
|
"loss": 0.0739, |
|
"step": 1007 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.0001798223840061116, |
|
"loss": 0.0001, |
|
"step": 1008 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.0001797833492689975, |
|
"loss": 0.0863, |
|
"step": 1009 |
|
}, |
|
{ |
|
"epoch": 2.49, |
|
"learning_rate": 0.00017974428105627208, |
|
"loss": 0.0002, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.00017970517938432765, |
|
"loss": 0.0538, |
|
"step": 1011 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.00017966604426957047, |
|
"loss": 0.0001, |
|
"step": 1012 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.00017962687572842102, |
|
"loss": 0.034, |
|
"step": 1013 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 0.00017958767377731358, |
|
"loss": 0.0001, |
|
"step": 1014 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.00017954843843269664, |
|
"loss": 0.0001, |
|
"step": 1015 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.00017950916971103259, |
|
"loss": 0.0001, |
|
"step": 1016 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.00017946986762879785, |
|
"loss": 0.0002, |
|
"step": 1017 |
|
}, |
|
{ |
|
"epoch": 2.51, |
|
"learning_rate": 0.00017943053220248283, |
|
"loss": 0.0489, |
|
"step": 1018 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.000179391163448592, |
|
"loss": 0.0002, |
|
"step": 1019 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.0001793517613836437, |
|
"loss": 0.0003, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.00017931232602417033, |
|
"loss": 0.0003, |
|
"step": 1021 |
|
}, |
|
{ |
|
"epoch": 2.52, |
|
"learning_rate": 0.00017927285738671825, |
|
"loss": 0.0641, |
|
"step": 1022 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.00017923335548784773, |
|
"loss": 0.0714, |
|
"step": 1023 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.00017919382034413305, |
|
"loss": 0.0004, |
|
"step": 1024 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.00017915425197216245, |
|
"loss": 0.0272, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 2.53, |
|
"learning_rate": 0.00017911465038853805, |
|
"loss": 0.0003, |
|
"step": 1026 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.00017907501560987594, |
|
"loss": 0.0003, |
|
"step": 1027 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.00017903534765280614, |
|
"loss": 0.0001, |
|
"step": 1028 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.00017899564653397262, |
|
"loss": 0.0357, |
|
"step": 1029 |
|
}, |
|
{ |
|
"epoch": 2.54, |
|
"learning_rate": 0.00017895591227003315, |
|
"loss": 0.0015, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.00017891614487765959, |
|
"loss": 0.0957, |
|
"step": 1031 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.00017887634437353754, |
|
"loss": 0.0, |
|
"step": 1032 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.00017883651077436655, |
|
"loss": 0.0377, |
|
"step": 1033 |
|
}, |
|
{ |
|
"epoch": 2.55, |
|
"learning_rate": 0.00017879664409686008, |
|
"loss": 0.0415, |
|
"step": 1034 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.00017875674435774547, |
|
"loss": 0.0002, |
|
"step": 1035 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.00017871681157376383, |
|
"loss": 0.0489, |
|
"step": 1036 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.0001786768457616703, |
|
"loss": 0.0478, |
|
"step": 1037 |
|
}, |
|
{ |
|
"epoch": 2.56, |
|
"learning_rate": 0.00017863684693823374, |
|
"loss": 0.0675, |
|
"step": 1038 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.00017859681512023693, |
|
"loss": 0.0002, |
|
"step": 1039 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.00017855675032447648, |
|
"loss": 0.06, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.00017851665256776283, |
|
"loss": 0.1278, |
|
"step": 1041 |
|
}, |
|
{ |
|
"epoch": 2.57, |
|
"learning_rate": 0.00017847652186692026, |
|
"loss": 0.0002, |
|
"step": 1042 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.00017843635823878686, |
|
"loss": 0.0, |
|
"step": 1043 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.00017839616170021452, |
|
"loss": 0.0441, |
|
"step": 1044 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.00017835593226806903, |
|
"loss": 0.0, |
|
"step": 1045 |
|
}, |
|
{ |
|
"epoch": 2.58, |
|
"learning_rate": 0.00017831566995922985, |
|
"loss": 0.0446, |
|
"step": 1046 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00017827537479059026, |
|
"loss": 0.0006, |
|
"step": 1047 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.0001782350467790575, |
|
"loss": 0.0002, |
|
"step": 1048 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00017819468594155232, |
|
"loss": 0.0002, |
|
"step": 1049 |
|
}, |
|
{ |
|
"epoch": 2.59, |
|
"learning_rate": 0.00017815429229500946, |
|
"loss": 0.0001, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.00017811386585637727, |
|
"loss": 0.0392, |
|
"step": 1051 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.00017807340664261802, |
|
"loss": 0.0756, |
|
"step": 1052 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.0001780329146707076, |
|
"loss": 0.0001, |
|
"step": 1053 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.00017799238995763568, |
|
"loss": 0.0509, |
|
"step": 1054 |
|
}, |
|
{ |
|
"epoch": 2.6, |
|
"learning_rate": 0.00017795183252040567, |
|
"loss": 0.0, |
|
"step": 1055 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.00017791124237603477, |
|
"loss": 0.1026, |
|
"step": 1056 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.00017787061954155378, |
|
"loss": 0.0004, |
|
"step": 1057 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.00017782996403400736, |
|
"loss": 0.0395, |
|
"step": 1058 |
|
}, |
|
{ |
|
"epoch": 2.61, |
|
"learning_rate": 0.00017778927587045373, |
|
"loss": 0.029, |
|
"step": 1059 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.00017774855506796496, |
|
"loss": 0.0002, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.00017770780164362665, |
|
"loss": 0.0211, |
|
"step": 1061 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.0001776670156145383, |
|
"loss": 0.0596, |
|
"step": 1062 |
|
}, |
|
{ |
|
"epoch": 2.62, |
|
"learning_rate": 0.00017762619699781287, |
|
"loss": 0.0001, |
|
"step": 1063 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.00017758534581057718, |
|
"loss": 0.0499, |
|
"step": 1064 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.0001775444620699715, |
|
"loss": 0.043, |
|
"step": 1065 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.00017750354579315004, |
|
"loss": 0.0527, |
|
"step": 1066 |
|
}, |
|
{ |
|
"epoch": 2.63, |
|
"learning_rate": 0.00017746259699728042, |
|
"loss": 0.0855, |
|
"step": 1067 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.00017742161569954398, |
|
"loss": 0.0002, |
|
"step": 1068 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.0001773806019171358, |
|
"loss": 0.076, |
|
"step": 1069 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.0001773395556672644, |
|
"loss": 0.03, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 2.64, |
|
"learning_rate": 0.0001772984769671521, |
|
"loss": 0.0593, |
|
"step": 1071 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.0001772573658340347, |
|
"loss": 0.0493, |
|
"step": 1072 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.0001772162222851617, |
|
"loss": 0.0007, |
|
"step": 1073 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.0001771750463377962, |
|
"loss": 0.0033, |
|
"step": 1074 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 0.00017713383800921478, |
|
"loss": 0.0019, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.00017709259731670774, |
|
"loss": 0.0031, |
|
"step": 1076 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.00017705132427757895, |
|
"loss": 0.0028, |
|
"step": 1077 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.00017701001890914572, |
|
"loss": 0.0003, |
|
"step": 1078 |
|
}, |
|
{ |
|
"epoch": 2.66, |
|
"learning_rate": 0.00017696868122873909, |
|
"loss": 0.0786, |
|
"step": 1079 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.00017692731125370354, |
|
"loss": 0.0496, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.00017688590900139715, |
|
"loss": 0.1294, |
|
"step": 1081 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.00017684447448919154, |
|
"loss": 0.0002, |
|
"step": 1082 |
|
}, |
|
{ |
|
"epoch": 2.67, |
|
"learning_rate": 0.0001768030077344719, |
|
"loss": 0.0002, |
|
"step": 1083 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.00017676150875463686, |
|
"loss": 0.0001, |
|
"step": 1084 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.00017671997756709863, |
|
"loss": 0.0803, |
|
"step": 1085 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.0001766784141892829, |
|
"loss": 0.0232, |
|
"step": 1086 |
|
}, |
|
{ |
|
"epoch": 2.68, |
|
"learning_rate": 0.00017663681863862895, |
|
"loss": 0.0001, |
|
"step": 1087 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.0001765951909325895, |
|
"loss": 0.0522, |
|
"step": 1088 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.00017655353108863068, |
|
"loss": 0.0008, |
|
"step": 1089 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.00017651183912423228, |
|
"loss": 0.0002, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 2.69, |
|
"learning_rate": 0.00017647011505688743, |
|
"loss": 0.0001, |
|
"step": 1091 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.0001764283589041028, |
|
"loss": 0.0002, |
|
"step": 1092 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.00017638657068339843, |
|
"loss": 0.0428, |
|
"step": 1093 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.00017634475041230797, |
|
"loss": 0.0351, |
|
"step": 1094 |
|
}, |
|
{ |
|
"epoch": 2.7, |
|
"learning_rate": 0.00017630289810837834, |
|
"loss": 0.0647, |
|
"step": 1095 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.00017626101378917004, |
|
"loss": 0.0181, |
|
"step": 1096 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.00017621909747225697, |
|
"loss": 0.0277, |
|
"step": 1097 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.0001761771491752264, |
|
"loss": 0.0002, |
|
"step": 1098 |
|
}, |
|
{ |
|
"epoch": 2.71, |
|
"learning_rate": 0.00017613516891567906, |
|
"loss": 0.0001, |
|
"step": 1099 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.0001760931567112291, |
|
"loss": 0.0006, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.00017605111257950408, |
|
"loss": 0.0002, |
|
"step": 1101 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.0001760090365381449, |
|
"loss": 0.065, |
|
"step": 1102 |
|
}, |
|
{ |
|
"epoch": 2.72, |
|
"learning_rate": 0.00017596692860480593, |
|
"loss": 0.0582, |
|
"step": 1103 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.0001759247887971548, |
|
"loss": 0.0932, |
|
"step": 1104 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.00017588261713287267, |
|
"loss": 0.0686, |
|
"step": 1105 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.00017584041362965396, |
|
"loss": 0.0451, |
|
"step": 1106 |
|
}, |
|
{ |
|
"epoch": 2.73, |
|
"learning_rate": 0.00017579817830520644, |
|
"loss": 0.0481, |
|
"step": 1107 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.0001757559111772513, |
|
"loss": 0.0387, |
|
"step": 1108 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.00017571361226352306, |
|
"loss": 0.0003, |
|
"step": 1109 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.00017567128158176953, |
|
"loss": 0.0873, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 2.74, |
|
"learning_rate": 0.0001756289191497519, |
|
"loss": 0.0605, |
|
"step": 1111 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.0001755865249852446, |
|
"loss": 0.0674, |
|
"step": 1112 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.00017554409910603552, |
|
"loss": 0.0238, |
|
"step": 1113 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.00017550164152992573, |
|
"loss": 0.0009, |
|
"step": 1114 |
|
}, |
|
{ |
|
"epoch": 2.75, |
|
"learning_rate": 0.00017545915227472965, |
|
"loss": 0.074, |
|
"step": 1115 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.00017541663135827492, |
|
"loss": 0.0561, |
|
"step": 1116 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.00017537407879840265, |
|
"loss": 0.0347, |
|
"step": 1117 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.000175331494612967, |
|
"loss": 0.0088, |
|
"step": 1118 |
|
}, |
|
{ |
|
"epoch": 2.76, |
|
"learning_rate": 0.0001752888788198355, |
|
"loss": 0.0704, |
|
"step": 1119 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.00017524623143688902, |
|
"loss": 0.0011, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.00017520355248202158, |
|
"loss": 0.1127, |
|
"step": 1121 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.00017516084197314046, |
|
"loss": 0.0006, |
|
"step": 1122 |
|
}, |
|
{ |
|
"epoch": 2.77, |
|
"learning_rate": 0.00017511809992816618, |
|
"loss": 0.0438, |
|
"step": 1123 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.00017507532636503256, |
|
"loss": 0.0614, |
|
"step": 1124 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.00017503252130168657, |
|
"loss": 0.0639, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.00017498968475608838, |
|
"loss": 0.0354, |
|
"step": 1126 |
|
}, |
|
{ |
|
"epoch": 2.78, |
|
"learning_rate": 0.00017494681674621148, |
|
"loss": 0.027, |
|
"step": 1127 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.00017490391729004244, |
|
"loss": 0.0001, |
|
"step": 1128 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.00017486098640558107, |
|
"loss": 0.0002, |
|
"step": 1129 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.00017481802411084042, |
|
"loss": 0.0538, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 2.79, |
|
"learning_rate": 0.0001747750304238466, |
|
"loss": 0.0674, |
|
"step": 1131 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.00017473200536263905, |
|
"loss": 0.0974, |
|
"step": 1132 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0001746889489452702, |
|
"loss": 0.0006, |
|
"step": 1133 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0001746458611898058, |
|
"loss": 0.0003, |
|
"step": 1134 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.0001746027421143246, |
|
"loss": 0.0523, |
|
"step": 1135 |
|
}, |
|
{ |
|
"epoch": 2.8, |
|
"learning_rate": 0.00017455959173691863, |
|
"loss": 0.0002, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 0.00017451641007569296, |
|
"loss": 0.0055, |
|
"step": 1137 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 0.00017447319714876579, |
|
"loss": 0.0359, |
|
"step": 1138 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 0.00017442995297426846, |
|
"loss": 0.0001, |
|
"step": 1139 |
|
}, |
|
{ |
|
"epoch": 2.81, |
|
"learning_rate": 0.00017438667757034546, |
|
"loss": 0.0001, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.00017434337095515432, |
|
"loss": 0.0604, |
|
"step": 1141 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.00017430003314686569, |
|
"loss": 0.0363, |
|
"step": 1142 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.00017425666416366332, |
|
"loss": 0.0013, |
|
"step": 1143 |
|
}, |
|
{ |
|
"epoch": 2.82, |
|
"learning_rate": 0.00017421326402374405, |
|
"loss": 0.0001, |
|
"step": 1144 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.00017416983274531775, |
|
"loss": 0.1052, |
|
"step": 1145 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.00017412637034660734, |
|
"loss": 0.0003, |
|
"step": 1146 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.0001740828768458489, |
|
"loss": 0.0323, |
|
"step": 1147 |
|
}, |
|
{ |
|
"epoch": 2.83, |
|
"learning_rate": 0.0001740393522612915, |
|
"loss": 0.0317, |
|
"step": 1148 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.00017399579661119715, |
|
"loss": 0.0001, |
|
"step": 1149 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.0001739522099138411, |
|
"loss": 0.0003, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.00017390859218751142, |
|
"loss": 0.0001, |
|
"step": 1151 |
|
}, |
|
{ |
|
"epoch": 2.84, |
|
"learning_rate": 0.00017386494345050942, |
|
"loss": 0.0008, |
|
"step": 1152 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.0001738212637211492, |
|
"loss": 0.0334, |
|
"step": 1153 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.000173777553017758, |
|
"loss": 0.0001, |
|
"step": 1154 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.00017373381135867604, |
|
"loss": 0.0549, |
|
"step": 1155 |
|
}, |
|
{ |
|
"epoch": 2.85, |
|
"learning_rate": 0.00017369003876225642, |
|
"loss": 0.0469, |
|
"step": 1156 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.00017364623524686543, |
|
"loss": 0.0053, |
|
"step": 1157 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.00017360240083088213, |
|
"loss": 0.0833, |
|
"step": 1158 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.00017355853553269865, |
|
"loss": 0.0145, |
|
"step": 1159 |
|
}, |
|
{ |
|
"epoch": 2.86, |
|
"learning_rate": 0.00017351463937072004, |
|
"loss": 0.0002, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.00017347071236336437, |
|
"loss": 0.042, |
|
"step": 1161 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.00017342675452906248, |
|
"loss": 0.0001, |
|
"step": 1162 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.00017338276588625839, |
|
"loss": 0.002, |
|
"step": 1163 |
|
}, |
|
{ |
|
"epoch": 2.87, |
|
"learning_rate": 0.00017333874645340884, |
|
"loss": 0.0003, |
|
"step": 1164 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.0001732946962489836, |
|
"loss": 0.0416, |
|
"step": 1165 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.0001732506152914653, |
|
"loss": 0.0002, |
|
"step": 1166 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.0001732065035993495, |
|
"loss": 0.0005, |
|
"step": 1167 |
|
}, |
|
{ |
|
"epoch": 2.88, |
|
"learning_rate": 0.00017316236119114463, |
|
"loss": 0.0531, |
|
"step": 1168 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.00017311818808537206, |
|
"loss": 0.0492, |
|
"step": 1169 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.00017307398430056593, |
|
"loss": 0.0001, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.00017302974985527344, |
|
"loss": 0.0001, |
|
"step": 1171 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 0.00017298548476805446, |
|
"loss": 0.0628, |
|
"step": 1172 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.00017294118905748182, |
|
"loss": 0.0001, |
|
"step": 1173 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.00017289686274214118, |
|
"loss": 0.0185, |
|
"step": 1174 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.000172852505840631, |
|
"loss": 0.0002, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 2.9, |
|
"learning_rate": 0.00017280811837156268, |
|
"loss": 0.0005, |
|
"step": 1176 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.00017276370035356034, |
|
"loss": 0.0421, |
|
"step": 1177 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.00017271925180526094, |
|
"loss": 0.0001, |
|
"step": 1178 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.00017267477274531432, |
|
"loss": 0.0001, |
|
"step": 1179 |
|
}, |
|
{ |
|
"epoch": 2.91, |
|
"learning_rate": 0.00017263026319238301, |
|
"loss": 0.0001, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.0001725857231651424, |
|
"loss": 0.0001, |
|
"step": 1181 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.0001725411526822807, |
|
"loss": 0.0469, |
|
"step": 1182 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.00017249655176249882, |
|
"loss": 0.0001, |
|
"step": 1183 |
|
}, |
|
{ |
|
"epoch": 2.92, |
|
"learning_rate": 0.0001724519204245105, |
|
"loss": 0.0001, |
|
"step": 1184 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.00017240725868704218, |
|
"loss": 0.0003, |
|
"step": 1185 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.0001723625665688331, |
|
"loss": 0.0418, |
|
"step": 1186 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.00017231784408863532, |
|
"loss": 0.0004, |
|
"step": 1187 |
|
}, |
|
{ |
|
"epoch": 2.93, |
|
"learning_rate": 0.00017227309126521348, |
|
"loss": 0.0002, |
|
"step": 1188 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.00017222830811734502, |
|
"loss": 0.0245, |
|
"step": 1189 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.00017218349466382023, |
|
"loss": 0.0334, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.00017213865092344187, |
|
"loss": 0.0004, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 2.94, |
|
"learning_rate": 0.00017209377691502565, |
|
"loss": 0.0001, |
|
"step": 1192 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.00017204887265739977, |
|
"loss": 0.0459, |
|
"step": 1193 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.0001720039381694053, |
|
"loss": 0.0371, |
|
"step": 1194 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.0001719589734698959, |
|
"loss": 0.0001, |
|
"step": 1195 |
|
}, |
|
{ |
|
"epoch": 2.95, |
|
"learning_rate": 0.00017191397857773788, |
|
"loss": 0.0014, |
|
"step": 1196 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.00017186895351181032, |
|
"loss": 0.0002, |
|
"step": 1197 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.00017182389829100485, |
|
"loss": 0.0001, |
|
"step": 1198 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.00017177881293422583, |
|
"loss": 0.0413, |
|
"step": 1199 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"learning_rate": 0.00017173369746039025, |
|
"loss": 0.0001, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.96, |
|
"eval_loss": 0.014002716168761253, |
|
"eval_runtime": 126.0996, |
|
"eval_samples_per_second": 1.015, |
|
"eval_steps_per_second": 0.341, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.00017168855188842773, |
|
"loss": 0.0004, |
|
"step": 1201 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.00017164337623728045, |
|
"loss": 0.0012, |
|
"step": 1202 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.0001715981705259033, |
|
"loss": 0.0001, |
|
"step": 1203 |
|
}, |
|
{ |
|
"epoch": 2.97, |
|
"learning_rate": 0.00017155293477326384, |
|
"loss": 0.0009, |
|
"step": 1204 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.00017150766899834204, |
|
"loss": 0.0307, |
|
"step": 1205 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.00017146237322013068, |
|
"loss": 0.024, |
|
"step": 1206 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.00017141704745763492, |
|
"loss": 0.0002, |
|
"step": 1207 |
|
}, |
|
{ |
|
"epoch": 2.98, |
|
"learning_rate": 0.00017137169172987268, |
|
"loss": 0.0417, |
|
"step": 1208 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.00017132630605587435, |
|
"loss": 0.0655, |
|
"step": 1209 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.00017128089045468294, |
|
"loss": 0.0002, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.00017123544494535397, |
|
"loss": 0.0001, |
|
"step": 1211 |
|
}, |
|
{ |
|
"epoch": 2.99, |
|
"learning_rate": 0.00017118996954695553, |
|
"loss": 0.091, |
|
"step": 1212 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.00017114446427856828, |
|
"loss": 0.0471, |
|
"step": 1213 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.00017109892915928535, |
|
"loss": 0.0436, |
|
"step": 1214 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"learning_rate": 0.00017105336420821247, |
|
"loss": 0.0, |
|
"step": 1215 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 4860, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 12, |
|
"save_steps": 500, |
|
"total_flos": 1.8178730075703214e+18, |
|
"train_batch_size": 3, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|