File size: 26,723 Bytes
0bfc910
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
Step 1 | loss:0.044024672359228134 lr:4.000000000000001e-06 tokens_per_second_per_gpu:2491.6973629520467 
Step 2 | loss:0.05892680212855339 lr:8.000000000000001e-06 tokens_per_second_per_gpu:3974.2111776128972 
Step 3 | loss:0.04225453361868858 lr:1.2e-05 tokens_per_second_per_gpu:3905.370467421552 
Step 4 | loss:0.17764173448085785 lr:1.6000000000000003e-05 tokens_per_second_per_gpu:1551.9643215537606 
Step 5 | loss:0.037487227469682693 lr:2e-05 tokens_per_second_per_gpu:7130.939572168644 
Step 6 | loss:0.02036396972835064 lr:2.4e-05 tokens_per_second_per_gpu:7011.3341117074515 
Step 7 | loss:0.05785895884037018 lr:2.8000000000000003e-05 tokens_per_second_per_gpu:6912.113823343527 
Step 8 | loss:0.07742859423160553 lr:3.2000000000000005e-05 tokens_per_second_per_gpu:6956.913493790545 
Step 9 | loss:0.03660118579864502 lr:3.6e-05 tokens_per_second_per_gpu:7061.035461267933 
Step 10 | loss:0.04339168220758438 lr:4e-05 tokens_per_second_per_gpu:3193.9909894637226 
Step 11 | loss:0.03603703901171684 lr:4.4000000000000006e-05 tokens_per_second_per_gpu:7237.857296584382 
Step 12 | loss:0.054984547197818756 lr:4.8e-05 tokens_per_second_per_gpu:6993.441059839511 
Step 13 | loss:0.013416779227554798 lr:5.2000000000000004e-05 tokens_per_second_per_gpu:7072.765016051136 
Step 14 | loss:0.005032726563513279 lr:5.6000000000000006e-05 tokens_per_second_per_gpu:7127.475165099142 
Step 15 | loss:0.01390102505683899 lr:6e-05 tokens_per_second_per_gpu:7109.448269370965 
Step 16 | loss:0.0136568583548069 lr:6.400000000000001e-05 tokens_per_second_per_gpu:1982.3982249786534 
Step 17 | loss:0.0008882262627594173 lr:6.800000000000001e-05 tokens_per_second_per_gpu:7250.524389681674 
Step 18 | loss:0.0032167285680770874 lr:7.2e-05 tokens_per_second_per_gpu:7213.540355055951 
Step 19 | loss:0.0025759534910321236 lr:7.6e-05 tokens_per_second_per_gpu:7121.0069379952865 
Step 20 | loss:0.005988647695630789 lr:8e-05 tokens_per_second_per_gpu:2547.6257517383906 
Step 21 | loss:0.0008513733628205955 lr:8.4e-05 tokens_per_second_per_gpu:7191.71777409903 
Step 22 | loss:0.002248356817290187 lr:8.800000000000001e-05 tokens_per_second_per_gpu:7035.1764218136495 
Step 23 | loss:0.0009723395924083889 lr:9.200000000000001e-05 tokens_per_second_per_gpu:7080.389898625971 
Step 24 | loss:0.000236137057072483 lr:9.6e-05 tokens_per_second_per_gpu:7020.606339376537 
Step 25 | loss:0.0003219555364921689 lr:0.0001 tokens_per_second_per_gpu:7184.303569325483 
Step 26 | loss:0.00020240044977981597 lr:9.999512620046522e-05 tokens_per_second_per_gpu:7203.458892320161 
Step 27 | loss:0.0001260352582903579 lr:9.998050575201771e-05 tokens_per_second_per_gpu:6809.96120938708 
Step 28 | loss:6.977388693485409e-05 lr:9.995614150494293e-05 tokens_per_second_per_gpu:7104.9650664705705 
Step 29 | loss:1.6838406736496836e-05 lr:9.992203820909906e-05 tokens_per_second_per_gpu:7140.525330720113 
Step 30 | loss:0.00025073738652281463 lr:9.987820251299122e-05 tokens_per_second_per_gpu:7200.059353063329 
Step 31 | loss:1.950736304934253e-06 lr:9.982464296247522e-05 tokens_per_second_per_gpu:2762.464688903786 
Step 32 | loss:4.355678811407415e-06 lr:9.976136999909156e-05 tokens_per_second_per_gpu:7077.602903719109 
Step 33 | loss:0.0002980708086397499 lr:9.968839595802982e-05 tokens_per_second_per_gpu:7205.392617148052 
Step 34 | loss:5.626531219604658e-06 lr:9.96057350657239e-05 tokens_per_second_per_gpu:7055.941632262539 
Step 35 | loss:0.0689399391412735 lr:9.951340343707852e-05 tokens_per_second_per_gpu:7005.982858059566 
Step 36 | loss:2.136021066689864e-06 lr:9.941141907232765e-05 tokens_per_second_per_gpu:7095.48050132196 
Step 37 | loss:0.0004781906900461763 lr:9.929980185352526e-05 tokens_per_second_per_gpu:6924.668370821682 
Step 38 | loss:0.0005276959855109453 lr:9.917857354066931e-05 tokens_per_second_per_gpu:7064.710956998861 
Step 39 | loss:4.42290365754161e-05 lr:9.904775776745958e-05 tokens_per_second_per_gpu:7088.084264304419 
Step 40 | loss:0.002827366581186652 lr:9.890738003669029e-05 tokens_per_second_per_gpu:6966.313383062993 
Step 41 | loss:3.2591353829047875e-06 lr:9.875746771527816e-05 tokens_per_second_per_gpu:7104.248864905117 
Step 42 | loss:4.0894756239140406e-05 lr:9.859805002892732e-05 tokens_per_second_per_gpu:1164.9711553193358 
Step 43 | loss:2.042909954980132e-06 lr:9.842915805643155e-05 tokens_per_second_per_gpu:7025.651422634611 
Step 44 | loss:5.1866086323570926e-06 lr:9.825082472361557e-05 tokens_per_second_per_gpu:6935.558790612978 
Step 45 | loss:1.643649852667295e-06 lr:9.806308479691595e-05 tokens_per_second_per_gpu:7007.716467709001 
Step 46 | loss:4.22128823629464e-06 lr:9.786597487660337e-05 tokens_per_second_per_gpu:7030.722281930516 
Step 47 | loss:1.5117912880668882e-05 lr:9.765953338964735e-05 tokens_per_second_per_gpu:6557.16775418851 
Step 48 | loss:1.2553706255857833e-05 lr:9.744380058222483e-05 tokens_per_second_per_gpu:6638.369950492554 
Step 49 | loss:7.3374003477511e-06 lr:9.721881851187406e-05 tokens_per_second_per_gpu:1663.003343042255 
Step 50 | loss:2.4675407985341735e-05 lr:9.698463103929542e-05 tokens_per_second_per_gpu:7089.915264859427 
Step 51 | loss:9.96050403045956e-06 lr:9.674128381980072e-05 tokens_per_second_per_gpu:7138.593417947804 
Step 52 | loss:0.00020252492686267942 lr:9.648882429441257e-05 tokens_per_second_per_gpu:7093.839654869536 
Step 53 | loss:0.09148562699556351 lr:9.622730168061567e-05 tokens_per_second_per_gpu:6875.4494319134465 
Step 54 | loss:1.147508191934321e-05 lr:9.595676696276172e-05 tokens_per_second_per_gpu:7093.948313889124 
Step 55 | loss:2.533208862587344e-05 lr:9.567727288213005e-05 tokens_per_second_per_gpu:6897.8826465700495 
Step 56 | loss:6.397398919943953e-06 lr:9.538887392664544e-05 tokens_per_second_per_gpu:6289.281784075682 
Step 57 | loss:4.083194289705716e-05 lr:9.50916263202557e-05 tokens_per_second_per_gpu:6786.677966970842 
Step 58 | loss:7.382668172795093e-06 lr:9.478558801197065e-05 tokens_per_second_per_gpu:7149.280927403911 
Step 59 | loss:3.2811499295348767e-06 lr:9.447081866456489e-05 tokens_per_second_per_gpu:7141.383111775478 
Step 60 | loss:6.9066634750925e-05 lr:9.414737964294636e-05 tokens_per_second_per_gpu:6513.9318694201775 
Step 61 | loss:1.410721324646147e-05 lr:9.381533400219318e-05 tokens_per_second_per_gpu:3183.585154764245 
Step 62 | loss:1.873036308097653e-05 lr:9.347474647526095e-05 tokens_per_second_per_gpu:7024.102742524136 
Step 63 | loss:3.535393625497818e-05 lr:9.312568346036288e-05 tokens_per_second_per_gpu:6920.730649585629 
Step 64 | loss:6.71536399750039e-05 lr:9.276821300802534e-05 tokens_per_second_per_gpu:6870.022287696528 
Step 65 | loss:2.3371201677946374e-05 lr:9.24024048078213e-05 tokens_per_second_per_gpu:7102.3208098111545 
Step 66 | loss:4.80350126963458e-06 lr:9.202833017478422e-05 tokens_per_second_per_gpu:7149.835815077596 
Step 67 | loss:8.619115760666318e-06 lr:9.164606203550497e-05 tokens_per_second_per_gpu:7067.735036527763 
Step 68 | loss:0.001971589867025614 lr:9.125567491391476e-05 tokens_per_second_per_gpu:7170.884678860327 
Step 69 | loss:2.3260561647475697e-05 lr:9.085724491675642e-05 tokens_per_second_per_gpu:7088.57508957724 
Step 70 | loss:0.07770252227783203 lr:9.045084971874738e-05 tokens_per_second_per_gpu:6995.339749223063 
Step 71 | loss:3.7465426430571824e-05 lr:9.003656854743667e-05 tokens_per_second_per_gpu:6961.616681131454 
Step 72 | loss:5.002773832529783e-05 lr:8.961448216775954e-05 tokens_per_second_per_gpu:6752.329964849588 
Step 73 | loss:1.054703079717001e-05 lr:8.9184672866292e-05 tokens_per_second_per_gpu:7141.04100419206 
Step 74 | loss:0.06129450723528862 lr:8.874722443520899e-05 tokens_per_second_per_gpu:6972.27620878167 
Step 75 | loss:4.6133030991768464e-05 lr:8.83022221559489e-05 tokens_per_second_per_gpu:7008.612537455016 
Step 76 | loss:4.6424112952081487e-05 lr:8.784975278258783e-05 tokens_per_second_per_gpu:7025.140425388787 
Step 77 | loss:0.0019122450612485409 lr:8.73899045249266e-05 tokens_per_second_per_gpu:6539.73691951206 
Step 78 | loss:1.744095243338961e-05 lr:8.692276703129421e-05 tokens_per_second_per_gpu:7112.092177721466 
Step 79 | loss:4.13628549722489e-05 lr:8.644843137107059e-05 tokens_per_second_per_gpu:6536.868072506976 
Step 80 | loss:6.11851064604707e-05 lr:8.596699001693255e-05 tokens_per_second_per_gpu:6961.497704848605 
Step 81 | loss:1.2528754268714692e-05 lr:8.547853682682604e-05 tokens_per_second_per_gpu:7086.960101134971 
Step 82 | loss:1.1446889402577654e-05 lr:8.498316702566828e-05 tokens_per_second_per_gpu:6691.280208245457 
Step 83 | loss:4.815257852897048e-06 lr:8.44809771867835e-05 tokens_per_second_per_gpu:6658.248403221658 
Step 84 | loss:3.0488231459457893e-06 lr:8.397206521307584e-05 tokens_per_second_per_gpu:6978.3308970377675 
Step 85 | loss:1.2022415830870159e-05 lr:8.345653031794292e-05 tokens_per_second_per_gpu:7024.04356118677 
Step 86 | loss:9.813477845455054e-06 lr:8.293447300593402e-05 tokens_per_second_per_gpu:6766.809768733323 
Step 87 | loss:2.6369045372121036e-05 lr:8.240599505315655e-05 tokens_per_second_per_gpu:6978.200935022428 
Step 88 | loss:8.541922397853341e-06 lr:8.18711994874345e-05 tokens_per_second_per_gpu:7098.156734459604 
Step 89 | loss:3.103864855802385e-06 lr:8.133019056822304e-05 tokens_per_second_per_gpu:7219.056551161025 
Step 90 | loss:3.5655316423799377e-06 lr:8.07830737662829e-05 tokens_per_second_per_gpu:7019.067698524688 
Step 91 | loss:1.0854146239580587e-06 lr:8.022995574311876e-05 tokens_per_second_per_gpu:7153.69747754744 
Step 92 | loss:4.263146365701687e-06 lr:7.967094433018508e-05 tokens_per_second_per_gpu:6957.553828013221 
Step 93 | loss:7.060260486468906e-06 lr:7.910614850786448e-05 tokens_per_second_per_gpu:6853.263305103832 
Step 94 | loss:1.1903109225386288e-05 lr:7.85356783842216e-05 tokens_per_second_per_gpu:6589.575326434221 
Step 95 | loss:0.00016185850836336613 lr:7.795964517353735e-05 tokens_per_second_per_gpu:6944.601303606873 
Step 96 | loss:8.044984861044213e-05 lr:7.737816117462752e-05 tokens_per_second_per_gpu:6884.864612321647 
Step 97 | loss:7.145414656406501e-06 lr:7.679133974894983e-05 tokens_per_second_per_gpu:6651.570890606297 
Step 98 | loss:9.237920494342688e-06 lr:7.619929529850397e-05 tokens_per_second_per_gpu:6981.50227936076 
Step 99 | loss:1.2111839168937877e-05 lr:7.560214324352858e-05 tokens_per_second_per_gpu:7168.252572265034 
Step 100 | loss:0.026117179542779922 lr:7.500000000000001e-05 tokens_per_second_per_gpu:7081.326389342591 
Step 101 | loss:0.0802021324634552 lr:7.439298295693665e-05 tokens_per_second_per_gpu:6970.406976143403 
Step 102 | loss:1.909429329316481e-06 lr:7.378121045351378e-05 tokens_per_second_per_gpu:6981.964998585159 
Step 103 | loss:3.795558086494566e-06 lr:7.316480175599309e-05 tokens_per_second_per_gpu:7068.4476399981295 
Step 104 | loss:2.2362472009263e-05 lr:7.254387703447154e-05 tokens_per_second_per_gpu:6983.714695167926 
Step 105 | loss:7.627813465660438e-05 lr:7.191855733945387e-05 tokens_per_second_per_gpu:6743.04177836767 
Step 106 | loss:1.7890037270262837e-05 lr:7.128896457825364e-05 tokens_per_second_per_gpu:7010.008610179187 
Step 107 | loss:7.333755092986394e-06 lr:7.06552214912271e-05 tokens_per_second_per_gpu:7180.622708674705 
Step 108 | loss:7.34656787244603e-05 lr:7.001745162784477e-05 tokens_per_second_per_gpu:6715.895583278378 
Step 109 | loss:0.04645148292183876 lr:6.937577932260515e-05 tokens_per_second_per_gpu:6988.207653074747 
Step 110 | loss:2.43443064391613e-05 lr:6.873032967079561e-05 tokens_per_second_per_gpu:6956.44202161727 
Step 111 | loss:6.660214239673223e-06 lr:6.808122850410461e-05 tokens_per_second_per_gpu:6822.459221804685 
Step 112 | loss:0.012756402604281902 lr:6.742860236609077e-05 tokens_per_second_per_gpu:7100.676232373181 
Step 113 | loss:1.7311773262917995e-05 lr:6.677257848751277e-05 tokens_per_second_per_gpu:7156.757410833997 
Step 114 | loss:3.133984137093648e-05 lr:6.611328476152557e-05 tokens_per_second_per_gpu:7024.579430239129 
Step 115 | loss:1.045593398885103e-05 lr:6.545084971874738e-05 tokens_per_second_per_gpu:7132.709611658199 
Step 116 | loss:2.3038093786453828e-05 lr:6.478540250220234e-05 tokens_per_second_per_gpu:6551.364179401075 
Step 117 | loss:4.244555384502746e-05 lr:6.411707284214384e-05 tokens_per_second_per_gpu:6972.92108119478 
Step 118 | loss:4.624223493010504e-06 lr:6.344599103076329e-05 tokens_per_second_per_gpu:7040.4981318665305 
Step 119 | loss:1.8302846001461148e-05 lr:6.277228789678953e-05 tokens_per_second_per_gpu:7035.868734705393 
Step 120 | loss:1.2434774134817417e-06 lr:6.209609477998338e-05 tokens_per_second_per_gpu:6995.260130722958 
Step 121 | loss:0.059504956007003784 lr:6.141754350553279e-05 tokens_per_second_per_gpu:6470.644661102764 
Step 122 | loss:0.06491203606128693 lr:6.073676635835317e-05 tokens_per_second_per_gpu:6572.499955115313 
Step 123 | loss:0.0078155267983675 lr:6.005389605729824e-05 tokens_per_second_per_gpu:7122.328916332964 
Step 124 | loss:3.2552068205404794e-06 lr:5.9369065729286245e-05 tokens_per_second_per_gpu:6979.690909126259 
Step 125 | loss:2.347284316783771e-05 lr:5.868240888334653e-05 tokens_per_second_per_gpu:1226.3605470349123 
Step 126 | loss:6.7143696469429415e-06 lr:5.799405938459175e-05 tokens_per_second_per_gpu:7054.351458698333 
Step 127 | loss:8.008167242223863e-06 lr:5.730415142812059e-05 tokens_per_second_per_gpu:7079.00426628037 
Step 128 | loss:1.265558512386633e-05 lr:5.661281951285613e-05 tokens_per_second_per_gpu:6978.817305625372 
Step 129 | loss:5.403888280852698e-06 lr:5.5920198415325064e-05 tokens_per_second_per_gpu:7116.884844031205 
Step 130 | loss:3.3160192742798245e-06 lr:5.522642316338268e-05 tokens_per_second_per_gpu:7170.247458165003 
Step 131 | loss:0.03811214491724968 lr:5.453162900988902e-05 tokens_per_second_per_gpu:6532.210400971212 
Step 132 | loss:9.247117304767016e-06 lr:5.383595140634093e-05 tokens_per_second_per_gpu:7168.145254774769 
Step 133 | loss:2.4717032829357777e-06 lr:5.313952597646568e-05 tokens_per_second_per_gpu:7091.0692874879815 
Step 134 | loss:7.369029481196776e-05 lr:5.244248848978067e-05 tokens_per_second_per_gpu:7004.214080065665 
Step 135 | loss:3.1085594400792615e-06 lr:5.174497483512506e-05 tokens_per_second_per_gpu:6990.065421926794 
Step 136 | loss:9.577012178851874e-07 lr:5.104712099416785e-05 tokens_per_second_per_gpu:7008.389750168799 
Step 137 | loss:1.0971440133289434e-05 lr:5.034906301489808e-05 tokens_per_second_per_gpu:6928.087081095463 
Step 138 | loss:4.204719516565092e-06 lr:4.965093698510193e-05 tokens_per_second_per_gpu:6909.812329064061 
Step 139 | loss:1.0095306606672239e-05 lr:4.895287900583216e-05 tokens_per_second_per_gpu:7073.22815454305 
Step 140 | loss:3.268487853347324e-05 lr:4.825502516487497e-05 tokens_per_second_per_gpu:5531.814526502013 
Step 141 | loss:0.04684333875775337 lr:4.755751151021934e-05 tokens_per_second_per_gpu:1222.5209777558305 
Step 142 | loss:0.007246284279972315 lr:4.6860474023534335e-05 tokens_per_second_per_gpu:7069.36560305965 
Step 143 | loss:0.0004771423991769552 lr:4.616404859365907e-05 tokens_per_second_per_gpu:6842.861539922387 
Step 144 | loss:1.924601019709371e-05 lr:4.5468370990111006e-05 tokens_per_second_per_gpu:6604.936550883388 
Step 145 | loss:0.0027290782891213894 lr:4.477357683661734e-05 tokens_per_second_per_gpu:7107.98693950717 
Step 146 | loss:1.3675327181772445e-06 lr:4.407980158467495e-05 tokens_per_second_per_gpu:7070.146281288021 
Step 147 | loss:1.4125947700449615e-06 lr:4.3387180487143876e-05 tokens_per_second_per_gpu:7067.684661770268 
Step 148 | loss:4.665956112148706e-06 lr:4.269584857187943e-05 tokens_per_second_per_gpu:6923.578320260167 
Step 149 | loss:1.6779952147771837e-06 lr:4.2005940615408264e-05 tokens_per_second_per_gpu:7082.018376457293 
Step 150 | loss:0.0056058503687381744 lr:4.131759111665349e-05 tokens_per_second_per_gpu:7219.916012532959 
Step 151 | loss:5.618292561848648e-06 lr:4.063093427071376e-05 tokens_per_second_per_gpu:7145.512686855166 
Step 152 | loss:1.7041573983078706e-06 lr:3.9946103942701777e-05 tokens_per_second_per_gpu:7097.425317714264 
Step 153 | loss:5.979360139463097e-06 lr:3.926323364164684e-05 tokens_per_second_per_gpu:6643.699919698053 
Step 154 | loss:2.7793234949058387e-06 lr:3.858245649446721e-05 tokens_per_second_per_gpu:7055.825278983358 
Step 155 | loss:5.22729760632501e-06 lr:3.790390522001662e-05 tokens_per_second_per_gpu:7009.005737334563 
Step 156 | loss:2.1051121166237863e-06 lr:3.7227712103210486e-05 tokens_per_second_per_gpu:7026.554489305066 
Step 157 | loss:6.214169161466998e-07 lr:3.655400896923672e-05 tokens_per_second_per_gpu:7142.538870029408 
Step 158 | loss:1.981428567887633e-06 lr:3.588292715785617e-05 tokens_per_second_per_gpu:7035.408645586714 
Step 159 | loss:1.229327267537883e-06 lr:3.5214597497797684e-05 tokens_per_second_per_gpu:7116.95198366537 
Step 160 | loss:0.00840386375784874 lr:3.4549150281252636e-05 tokens_per_second_per_gpu:7011.8687130323815 
Step 161 | loss:4.690188688982744e-06 lr:3.388671523847445e-05 tokens_per_second_per_gpu:6973.663980687629 
Step 162 | loss:1.0637126024448662e-06 lr:3.322742151248725e-05 tokens_per_second_per_gpu:7037.310787111708 
Step 163 | loss:2.723348643485224e-06 lr:3.257139763390925e-05 tokens_per_second_per_gpu:6965.671214397283 
Step 164 | loss:3.37466713062895e-06 lr:3.1918771495895396e-05 tokens_per_second_per_gpu:7002.487856209016 
Step 165 | loss:1.709984417175292e-06 lr:3.12696703292044e-05 tokens_per_second_per_gpu:6973.210713786478 
Step 166 | loss:4.1995344872702844e-06 lr:3.062422067739485e-05 tokens_per_second_per_gpu:7116.373478596367 
Step 167 | loss:2.869960189855192e-05 lr:2.9982548372155263e-05 tokens_per_second_per_gpu:7169.540838190056 
Step 168 | loss:5.113638962939149e-06 lr:2.934477850877292e-05 tokens_per_second_per_gpu:7109.270070429179 
Step 169 | loss:1.845184783633158e-06 lr:2.8711035421746367e-05 tokens_per_second_per_gpu:6897.6678713681085 
Step 170 | loss:7.516128789575305e-06 lr:2.8081442660546125e-05 tokens_per_second_per_gpu:6839.149029652966 
Step 171 | loss:2.8925494461873313e-06 lr:2.7456122965528475e-05 tokens_per_second_per_gpu:7031.877241122064 
Step 172 | loss:7.430535333696753e-07 lr:2.6835198244006927e-05 tokens_per_second_per_gpu:7065.385613955569 
Step 173 | loss:2.773562755464809e-06 lr:2.6218789546486234e-05 tokens_per_second_per_gpu:7121.649218064128 
Step 174 | loss:1.5943980997690232e-06 lr:2.560701704306336e-05 tokens_per_second_per_gpu:7043.376839781155 
Step 175 | loss:2.5899310003296705e-06 lr:2.500000000000001e-05 tokens_per_second_per_gpu:6978.686237887583 
Step 176 | loss:3.070770162594272e-06 lr:2.4397856756471432e-05 tokens_per_second_per_gpu:7021.193638604272 
Step 177 | loss:2.501697053958196e-06 lr:2.3800704701496053e-05 tokens_per_second_per_gpu:6935.321589001509 
Step 178 | loss:9.196894552587764e-07 lr:2.3208660251050158e-05 tokens_per_second_per_gpu:7113.116177287278 
Step 179 | loss:1.4915924566594185e-06 lr:2.2621838825372493e-05 tokens_per_second_per_gpu:7080.012930507778 
Step 180 | loss:7.816337529220618e-06 lr:2.2040354826462668e-05 tokens_per_second_per_gpu:6730.247387924899 
Step 181 | loss:4.5060451157041825e-06 lr:2.1464321615778422e-05 tokens_per_second_per_gpu:6633.126456234147 
Step 182 | loss:9.429815008843434e-07 lr:2.0893851492135537e-05 tokens_per_second_per_gpu:6876.042804461826 
Step 183 | loss:1.7461517245465075e-06 lr:2.0329055669814934e-05 tokens_per_second_per_gpu:7088.35117352931 
Step 184 | loss:8.307407028951275e-07 lr:1.977004425688126e-05 tokens_per_second_per_gpu:7043.547922215887 
Step 185 | loss:4.707547304860782e-06 lr:1.9216926233717085e-05 tokens_per_second_per_gpu:6643.8900988303285 
Step 186 | loss:1.0972149539156817e-05 lr:1.866980943177699e-05 tokens_per_second_per_gpu:6694.705107240203 
Step 187 | loss:4.4071919546695426e-05 lr:1.8128800512565513e-05 tokens_per_second_per_gpu:7240.146015342119 
Step 188 | loss:8.381610427932173e-07 lr:1.7594004946843456e-05 tokens_per_second_per_gpu:7096.8964979719885 
Step 189 | loss:7.407321049868187e-07 lr:1.7065526994065973e-05 tokens_per_second_per_gpu:7121.173756895708 
Step 190 | loss:3.3338999401166802e-06 lr:1.6543469682057106e-05 tokens_per_second_per_gpu:7100.223662186301 
Step 191 | loss:0.0035902601666748524 lr:1.602793478692419e-05 tokens_per_second_per_gpu:6515.473321335062 
Step 192 | loss:2.5614795049477834e-06 lr:1.551902281321651e-05 tokens_per_second_per_gpu:7003.541912899192 
Step 193 | loss:8.393068924306135e-07 lr:1.5016832974331724e-05 tokens_per_second_per_gpu:7014.491032799716 
Step 194 | loss:2.447998213028768e-06 lr:1.4521463173173965e-05 tokens_per_second_per_gpu:6708.696099311401 
Step 195 | loss:5.267979759082664e-06 lr:1.4033009983067452e-05 tokens_per_second_per_gpu:7116.479522540485 
Step 196 | loss:2.3782727112120483e-06 lr:1.3551568628929434e-05 tokens_per_second_per_gpu:7062.6359533488985 
Step 197 | loss:2.5765357349882834e-06 lr:1.3077232968705805e-05 tokens_per_second_per_gpu:6958.638851056429 
Step 198 | loss:1.9566266473702854e-06 lr:1.2610095475073414e-05 tokens_per_second_per_gpu:6180.3336310184695 
Step 199 | loss:1.4055506198928924e-06 lr:1.2150247217412186e-05 tokens_per_second_per_gpu:7024.581093181509 
Step 200 | loss:0.00019912575953640044 lr:1.1697777844051105e-05 tokens_per_second_per_gpu:7168.688028092893 
Step 201 | loss:1.6661166455378407e-06 lr:1.1252775564791024e-05 tokens_per_second_per_gpu:6929.813512350013 
Step 202 | loss:1.5410813603011775e-06 lr:1.0815327133708015e-05 tokens_per_second_per_gpu:6987.44783037789 
Step 203 | loss:4.979927325621247e-06 lr:1.0385517832240471e-05 tokens_per_second_per_gpu:6979.381617208856 
Step 204 | loss:1.2297186913201585e-06 lr:9.963431452563332e-06 tokens_per_second_per_gpu:7176.665234411136 
Step 205 | loss:1.3492949619831052e-05 lr:9.549150281252633e-06 tokens_per_second_per_gpu:7127.822007721629 
Step 206 | loss:8.906470156944124e-07 lr:9.142755083243576e-06 tokens_per_second_per_gpu:7210.724540147423 
Step 207 | loss:8.889155083124933e-07 lr:8.744325086085248e-06 tokens_per_second_per_gpu:6972.105740048867 
Step 208 | loss:8.329041634169698e-07 lr:8.353937964495029e-06 tokens_per_second_per_gpu:7106.554468682579 
Step 209 | loss:3.670160140245571e-06 lr:7.971669825215788e-06 tokens_per_second_per_gpu:7220.885235213336 
Step 210 | loss:5.802224222861696e-06 lr:7.597595192178702e-06 tokens_per_second_per_gpu:6605.317312538336 
Step 211 | loss:7.749559358671831e-07 lr:7.2317869919746705e-06 tokens_per_second_per_gpu:7032.812938997669 
Step 212 | loss:7.33648221284966e-06 lr:6.874316539637127e-06 tokens_per_second_per_gpu:7166.742192161262 
Step 213 | loss:6.916959591762861e-06 lr:6.52525352473905e-06 tokens_per_second_per_gpu:7222.582898447143 
Step 214 | loss:1.3800332681057625e-06 lr:6.184665997806832e-06 tokens_per_second_per_gpu:7143.656007254907 
Step 215 | loss:2.8561623821588e-06 lr:5.852620357053651e-06 tokens_per_second_per_gpu:7218.261854220109 
Step 216 | loss:2.382758566454868e-06 lr:5.529181335435124e-06 tokens_per_second_per_gpu:7259.21964949639 
Step 217 | loss:2.0544837298075436e-06 lr:5.214411988029355e-06 tokens_per_second_per_gpu:6678.5120598161 
Step 218 | loss:7.685586751904339e-06 lr:4.908373679744316e-06 tokens_per_second_per_gpu:6626.349056250582 
Step 219 | loss:6.007870524626924e-06 lr:4.611126073354571e-06 tokens_per_second_per_gpu:7077.564345481776 
Step 220 | loss:3.490607195999473e-05 lr:4.322727117869951e-06 tokens_per_second_per_gpu:7105.257936437791 
Step 221 | loss:6.473839675891213e-06 lr:4.043233037238281e-06 tokens_per_second_per_gpu:6574.786620635736 
Step 222 | loss:9.190073910758656e-07 lr:3.772698319384349e-06 tokens_per_second_per_gpu:7110.835667763218 
Step 223 | loss:3.068495061597787e-05 lr:3.511175705587433e-06 tokens_per_second_per_gpu:7237.129676743387 
Step 224 | loss:0.0015145567012950778 lr:3.258716180199278e-06 tokens_per_second_per_gpu:7148.635382871293 
Step 225 | loss:1.0374400289947516e-06 lr:3.0153689607045845e-06 tokens_per_second_per_gpu:6931.239539572596 
Step 226 | loss:1.3044374099990819e-05 lr:2.7811814881259503e-06 tokens_per_second_per_gpu:7222.589120266079 
Step 227 | loss:3.997599196736701e-05 lr:2.5561994177751737e-06 tokens_per_second_per_gpu:6805.225223179514 
Step 228 | loss:2.5553501927788602e-06 lr:2.340466610352654e-06 tokens_per_second_per_gpu:7050.79917922558 
Step 229 | loss:2.0318548195064068e-06 lr:2.134025123396638e-06 tokens_per_second_per_gpu:7133.6810033473275 
Step 230 | loss:3.323497367091477e-06 lr:1.9369152030840556e-06 tokens_per_second_per_gpu:6806.544235688607 
Step 231 | loss:3.3078231354011223e-06 lr:1.7491752763844293e-06 tokens_per_second_per_gpu:6575.355492388719 
Step 232 | loss:1.2345973345873062e-06 lr:1.5708419435684462e-06 tokens_per_second_per_gpu:7055.031265155706 
Step 233 | loss:7.690464940424135e-07 lr:1.4019499710726913e-06 tokens_per_second_per_gpu:7078.248774510847 
Step 234 | loss:2.586485607025679e-06 lr:1.2425322847218368e-06 tokens_per_second_per_gpu:7032.57973473013 
Step 235 | loss:4.3849086068803445e-06 lr:1.0926199633097157e-06 tokens_per_second_per_gpu:7029.305177581747 
Step 236 | loss:8.413181831201655e-07 lr:9.522422325404235e-07 tokens_per_second_per_gpu:7154.23857710486 
Step 237 | loss:1.4026572898728773e-06 lr:8.214264593307098e-07 tokens_per_second_per_gpu:7110.085257093312 
Step 238 | loss:7.505981557187624e-06 lr:7.001981464747565e-07 tokens_per_second_per_gpu:6492.374810946618 
Step 239 | loss:1.034242700370669e-06 lr:5.885809276723608e-07 tokens_per_second_per_gpu:7155.08391999291 
Step 240 | loss:7.5563552854873706e-06 lr:4.865965629214819e-07 tokens_per_second_per_gpu:7234.350878362913 
Step 241 | loss:0.011282374151051044 lr:3.9426493427611177e-07 tokens_per_second_per_gpu:7003.134545628831 
Step 242 | loss:2.187658310504048e-06 lr:3.1160404197018154e-07 tokens_per_second_per_gpu:7034.06735918719 
Step 243 | loss:1.2806139011445339e-06 lr:2.386300009084408e-07 tokens_per_second_per_gpu:7085.225395108095 
Step 244 | loss:6.366369120769377e-07 lr:1.753570375247815e-07 tokens_per_second_per_gpu:6865.731239141299 
Step 245 | loss:2.370304173382465e-06 lr:1.2179748700879012e-07 tokens_per_second_per_gpu:6986.651892281518 
Step 246 | loss:1.0725663059929502e-06 lr:7.796179090094891e-08 tokens_per_second_per_gpu:7068.680235913791 
Step 247 | loss:1.5921763178994297e-06 lr:4.385849505708084e-08 tokens_per_second_per_gpu:7031.133175520725 
Step 248 | loss:0.0008751447894610465 lr:1.949424798228239e-08 tokens_per_second_per_gpu:7130.958260467186 
Step 249 | loss:0.010600603185594082 lr:4.873799534788059e-09 tokens_per_second_per_gpu:6802.101333548679 
Step 250 | loss:2.739674073382048e-06 lr:0.0 tokens_per_second_per_gpu:6964.726939557889