mmenuu commited on
Commit
25fb141
1 Parent(s): 02b7bae

Upload 12 files

Browse files
.gitattributes CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  checkpoint-8100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
 
 
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
  checkpoint-8100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
37
+ tokenizer.json filter=lfs diff=lfs merge=lfs -text
added_tokens.json ADDED
@@ -0,0 +1,837 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "‼": 248733,
3
+ "⁉": 248850,
4
+ "⌛": 248707,
5
+ "⌨": 248450,
6
+ "⏏": 248572,
7
+ "⏫": 248890,
8
+ "⏭": 249212,
9
+ "⏮": 248920,
10
+ "⏯": 248576,
11
+ "⏲": 248922,
12
+ "⏸": 249193,
13
+ "⏹": 249213,
14
+ "Ⓜ": 248957,
15
+ "♉": 248638,
16
+ "♊": 248741,
17
+ "♌": 249062,
18
+ "♍": 248609,
19
+ "♎": 248611,
20
+ "♏": 249129,
21
+ "♑": 248994,
22
+ "♓": 248790,
23
+ "♾": 248725,
24
+ "♿": 248454,
25
+ "⚒": 248845,
26
+ "⚖": 248534,
27
+ "⚗": 248943,
28
+ "⚙": 248676,
29
+ "⚛": 248761,
30
+ "⚧": 248971,
31
+ "⚰": 248930,
32
+ "⚱": 249010,
33
+ "⛈": 249232,
34
+ "⛎": 248516,
35
+ "⛏": 248490,
36
+ "⛑": 248575,
37
+ "⛩": 248953,
38
+ "⛪": 249120,
39
+ "⛲": 249141,
40
+ "⛴": 248779,
41
+ "⛷": 248649,
42
+ "⛸": 248693,
43
+ "⛹": 249260,
44
+ "⛽": 249248,
45
+ "✝": 248664,
46
+ "➗": 248803,
47
+ "➰": 248665,
48
+ "➿": 248458,
49
+ "⬛": 249000,
50
+ "⬜": 248821,
51
+ "〽": 248791,
52
+ "㊗": 248521,
53
+ "㊙": 249060,
54
+ "🀄": 248929,
55
+ "🃏": 248468,
56
+ "🅿": 248755,
57
+ "🆎": 248619,
58
+ "🆒": 248798,
59
+ "🆕": 248655,
60
+ "🆖": 248837,
61
+ "🆗": 248917,
62
+ "🆙": 248978,
63
+ "🆚": 248972,
64
+ "🈁": 248495,
65
+ "🈂": 248844,
66
+ "🈚": 249053,
67
+ "🈯": 248568,
68
+ "🈲": 248512,
69
+ "🈳": 248785,
70
+ "🈴": 249003,
71
+ "🈵": 248752,
72
+ "🈶": 248502,
73
+ "🈷": 249153,
74
+ "🈸": 249079,
75
+ "🈹": 248690,
76
+ "🈺": 248472,
77
+ "🉐": 249070,
78
+ "🉑": 249224,
79
+ "🌁": 248694,
80
+ "🌂": 248584,
81
+ "🌃": 248739,
82
+ "🌇": 248528,
83
+ "🌉": 248964,
84
+ "🌌": 248587,
85
+ "🌑": 249069,
86
+ "🌒": 249187,
87
+ "🌓": 248598,
88
+ "🌔": 248514,
89
+ "🌕": 248452,
90
+ "🌖": 249242,
91
+ "🌗": 248478,
92
+ "🌘": 248782,
93
+ "🌚": 249190,
94
+ "🌛": 248984,
95
+ "🌜": 248635,
96
+ "🌠": 248623,
97
+ "🌡": 248904,
98
+ "🌤": 248812,
99
+ "🌥": 248881,
100
+ "🌦": 249172,
101
+ "🌧": 248666,
102
+ "🌨": 248730,
103
+ "🌩": 248795,
104
+ "🌪": 249022,
105
+ "🌫": 248734,
106
+ "🌬": 248530,
107
+ "🌭": 248868,
108
+ "🌮": 248976,
109
+ "🌯": 248716,
110
+ "🌰": 248872,
111
+ "🌵": 249081,
112
+ "🍈": 249173,
113
+ "🍐": 248933,
114
+ "🍖": 248742,
115
+ "🍗": 248433,
116
+ "🍘": 248647,
117
+ "🍙": 249057,
118
+ "🍛": 248869,
119
+ "🍜": 248979,
120
+ "🍝": 249087,
121
+ "🍞": 248697,
122
+ "🍟": 248748,
123
+ "🍠": 248735,
124
+ "🍡": 248591,
125
+ "🍢": 248833,
126
+ "🍣": 248744,
127
+ "🍤": 248777,
128
+ "🍥": 248722,
129
+ "🍧": 248692,
130
+ "🍩": 248935,
131
+ "🍭": 248923,
132
+ "🍮": 249067,
133
+ "🍱": 248715,
134
+ "🍵": 248873,
135
+ "🍶": 248814,
136
+ "🍸": 249251,
137
+ "🍼": 248650,
138
+ "🎃": 248736,
139
+ "🎆": 248678,
140
+ "🎇": 248589,
141
+ "🎌": 248805,
142
+ "🎍": 248892,
143
+ "🎎": 249231,
144
+ "🎏": 248670,
145
+ "🎐": 249014,
146
+ "🎑": 248954,
147
+ "🎒": 249072,
148
+ "🎚": 249103,
149
+ "🎛": 248704,
150
+ "🎟": 248802,
151
+ "🎠": 249005,
152
+ "🎡": 248932,
153
+ "🎣": 249137,
154
+ "🎦": 248858,
155
+ "🎪": 249086,
156
+ "🎫": 249258,
157
+ "🎰": 248683,
158
+ "🎱": 248961,
159
+ "🎲": 248455,
160
+ "🎳": 249073,
161
+ "🎴": 248769,
162
+ "🎷": 248451,
163
+ "🎹": 248503,
164
+ "🎺": 248586,
165
+ "🎻": 249148,
166
+ "🎽": 248577,
167
+ "🎾": 249108,
168
+ "🎿": 248915,
169
+ "🏁": 249031,
170
+ "🏂": 248792,
171
+ "🏅": 249063,
172
+ "🏇": 248612,
173
+ "🏈": 248891,
174
+ "🏉": 248434,
175
+ "🏊": 249052,
176
+ "🏍": 248525,
177
+ "🏎": 248992,
178
+ "🏏": 248515,
179
+ "🏐": 248660,
180
+ "🏑": 249144,
181
+ "🏒": 248866,
182
+ "🏓": 248459,
183
+ "🏔": 248588,
184
+ "🏕": 248916,
185
+ "🏗": 249098,
186
+ "🏘": 249199,
187
+ "🏙": 248567,
188
+ "🏚": 248888,
189
+ "🏛": 248914,
190
+ "🏜": 248685,
191
+ "🏞": 249200,
192
+ "🏟": 248815,
193
+ "🏣": 248828,
194
+ "🏤": 248773,
195
+ "🏥": 248835,
196
+ "🏦": 248553,
197
+ "🏧": 248642,
198
+ "🏨": 248561,
199
+ "🏩": 249182,
200
+ "🏪": 248999,
201
+ "🏬": 248996,
202
+ "🏭": 248934,
203
+ "🏮": 248644,
204
+ "🏯": 249175,
205
+ "🏰": 248774,
206
+ "🏴": 248501,
207
+ "🏷": 248427,
208
+ "🏸": 248804,
209
+ "🏹": 249130,
210
+ "🏺": 249250,
211
+ "🐀": 249257,
212
+ "🐁": 249252,
213
+ "🐂": 248621,
214
+ "🐃": 248695,
215
+ "🐄": 249194,
216
+ "🐅": 249117,
217
+ "🐆": 249026,
218
+ "🐇": 249015,
219
+ "🐈": 248527,
220
+ "🐉": 248462,
221
+ "🐊": 248574,
222
+ "🐋": 249208,
223
+ "🐌": 248880,
224
+ "🐎": 248479,
225
+ "🐏": 248865,
226
+ "🐐": 249198,
227
+ "🐑": 249122,
228
+ "🐓": 248680,
229
+ "🐔": 249068,
230
+ "🐕": 248756,
231
+ "🐖": 248603,
232
+ "🐗": 248750,
233
+ "🐙": 248701,
234
+ "🐚": 248886,
235
+ "🐛": 248540,
236
+ "🐜": 249249,
237
+ "🐟": 248912,
238
+ "🐠": 249253,
239
+ "🐡": 248480,
240
+ "🐢": 249210,
241
+ "🐧": 249254,
242
+ "🐨": 249019,
243
+ "🐩": 248580,
244
+ "🐪": 248775,
245
+ "🐫": 248505,
246
+ "🐬": 249002,
247
+ "🐭": 248615,
248
+ "🐮": 249074,
249
+ "🐵": 248533,
250
+ "🐹": 248441,
251
+ "🐺": 249055,
252
+ "🐽": 248602,
253
+ "🐿": 248823,
254
+ "👂": 249140,
255
+ "👓": 249170,
256
+ "👘": 248542,
257
+ "👛": 249217,
258
+ "👝": 248709,
259
+ "👞": 248728,
260
+ "👡": 248549,
261
+ "👢": 248910,
262
+ "👲": 249001,
263
+ "👴": 248820,
264
+ "👵": 248960,
265
+ "👷": 249146,
266
+ "👹": 249099,
267
+ "👺": 249106,
268
+ "👽": 248883,
269
+ "👾": 249059,
270
+ "💂": 248778,
271
+ "💈": 248554,
272
+ "💒": 248594,
273
+ "💤": 248466,
274
+ "💨": 248913,
275
+ "💩": 248508,
276
+ "💱": 249100,
277
+ "💴": 248931,
278
+ "💷": 248711,
279
+ "💹": 249158,
280
+ "💺": 248831,
281
+ "💽": 249021,
282
+ "💾": 249215,
283
+ "💿": 248604,
284
+ "📀": 248518,
285
+ "📁": 248444,
286
+ "📂": 248496,
287
+ "📄": 248801,
288
+ "📆": 248487,
289
+ "📇": 249007,
290
+ "📈": 248438,
291
+ "📉": 248617,
292
+ "📊": 248473,
293
+ "📏": 248556,
294
+ "📐": 248679,
295
+ "📑": 249256,
296
+ "📒": 248710,
297
+ "📓": 248632,
298
+ "📔": 248909,
299
+ "📗": 248807,
300
+ "📘": 248708,
301
+ "📙": 249160,
302
+ "📛": 248484,
303
+ "📟": 248727,
304
+ "📠": 249065,
305
+ "📡": 248532,
306
+ "📤": 248718,
307
+ "📨": 249196,
308
+ "📪": 248565,
309
+ "📫": 248469,
310
+ "📭": 248998,
311
+ "📯": 249220,
312
+ "📳": 248471,
313
+ "📴": 249004,
314
+ "📵": 249013,
315
+ "📶": 248969,
316
+ "📻": 249084,
317
+ "📼": 249230,
318
+ "📿": 249181,
319
+ "🔀": 248946,
320
+ "🔂": 248863,
321
+ "🔃": 249050,
322
+ "🔄": 248995,
323
+ "🔆": 249046,
324
+ "🔇": 249009,
325
+ "🔈": 248559,
326
+ "🔉": 248590,
327
+ "🔋": 248907,
328
+ "🔌": 248842,
329
+ "🔏": 248622,
330
+ "🔐": 248546,
331
+ "🔑": 248937,
332
+ "🔒": 248498,
333
+ "🔓": 249058,
334
+ "🔕": 248780,
335
+ "🔙": 249206,
336
+ "🔚": 248903,
337
+ "🔛": 248671,
338
+ "🔟": 249189,
339
+ "🔠": 249075,
340
+ "🔡": 248600,
341
+ "🔢": 249142,
342
+ "🔣": 248579,
343
+ "🔤": 248997,
344
+ "🔦": 248813,
345
+ "🔧": 248618,
346
+ "🔨": 248849,
347
+ "🔩": 248781,
348
+ "🔪": 248906,
349
+ "🔭": 249261,
350
+ "🔯": 248674,
351
+ "🔲": 248988,
352
+ "🔳": 248985,
353
+ "🔼": 248754,
354
+ "🔽": 248581,
355
+ "🕋": 249027,
356
+ "🕌": 249241,
357
+ "🕍": 248719,
358
+ "🕎": 248626,
359
+ "🕐": 248855,
360
+ "🕑": 248818,
361
+ "🕒": 248485,
362
+ "🕓": 248465,
363
+ "🕕": 248494,
364
+ "🕖": 248847,
365
+ "🕗": 249221,
366
+ "🕘": 248552,
367
+ "🕙": 248539,
368
+ "🕚": 248607,
369
+ "🕛": 248889,
370
+ "🕜": 248513,
371
+ "🕝": 248763,
372
+ "🕞": 248601,
373
+ "🕟": 248884,
374
+ "🕠": 249109,
375
+ "🕡": 248982,
376
+ "🕢": 248896,
377
+ "🕣": 248854,
378
+ "🕤": 248682,
379
+ "🕥": 248627,
380
+ "🕦": 248938,
381
+ "🕧": 248482,
382
+ "🕰": 248810,
383
+ "🕳": 248700,
384
+ "🕴": 248483,
385
+ "🕶": 248688,
386
+ "🕷": 248437,
387
+ "🕸": 248749,
388
+ "🕹": 248871,
389
+ "🖇": 248563,
390
+ "🖊": 248760,
391
+ "🖌": 249209,
392
+ "🖍": 248560,
393
+ "🖕": 249105,
394
+ "🖖": 248864,
395
+ "🖨": 248947,
396
+ "🖱": 248784,
397
+ "🖲": 248766,
398
+ "🖼": 248506,
399
+ "🗂": 248631,
400
+ "🗃": 248497,
401
+ "🗄": 248980,
402
+ "🗑": 249088,
403
+ "🗒": 249048,
404
+ "🗜": 248764,
405
+ "🗝": 248629,
406
+ "🗞": 248867,
407
+ "🗡": 249134,
408
+ "🗨": 249150,
409
+ "🗯": 248585,
410
+ "🗳": 248461,
411
+ "🗺": 248493,
412
+ "🗻": 249133,
413
+ "🗾": 248464,
414
+ "🗿": 249135,
415
+ "😧": 249227,
416
+ "😸": 248908,
417
+ "😺": 248851,
418
+ "😼": 248614,
419
+ "😽": 249121,
420
+ "😾": 248675,
421
+ "😿": 248796,
422
+ "🙉": 248633,
423
+ "🙍": 249066,
424
+ "🙎": 248808,
425
+ "🚁": 248677,
426
+ "🚂": 248504,
427
+ "🚃": 249184,
428
+ "🚄": 248648,
429
+ "🚅": 249102,
430
+ "🚆": 248974,
431
+ "🚈": 248901,
432
+ "🚉": 248856,
433
+ "🚊": 249178,
434
+ "🚋": 248878,
435
+ "🚍": 249016,
436
+ "🚎": 249163,
437
+ "🚏": 249119,
438
+ "🚐": 248731,
439
+ "🚑": 249188,
440
+ "🚒": 249225,
441
+ "🚓": 248817,
442
+ "🚔": 249259,
443
+ "🚕": 248608,
444
+ "🚖": 248667,
445
+ "🚜": 249076,
446
+ "🚝": 248882,
447
+ "🚞": 248596,
448
+ "🚟": 249154,
449
+ "🚠": 248640,
450
+ "🚡": 249020,
451
+ "🚢": 248547,
452
+ "🚣": 249186,
453
+ "🚤": 249204,
454
+ "🚥": 249164,
455
+ "🚦": 248717,
456
+ "🚧": 249143,
457
+ "🚪": 248861,
458
+ "🚬": 248918,
459
+ "🚭": 248836,
460
+ "🚮": 248583,
461
+ "🚯": 248659,
462
+ "🚰": 249202,
463
+ "🚱": 248786,
464
+ "🚳": 248758,
465
+ "🚷": 249149,
466
+ "🚸": 248550,
467
+ "🚹": 249024,
468
+ "🚺": 249044,
469
+ "🚻": 248636,
470
+ "🚼": 249214,
471
+ "🚽": 248726,
472
+ "🚾": 249205,
473
+ "🚿": 248941,
474
+ "🛀": 249097,
475
+ "🛁": 248430,
476
+ "🛂": 248770,
477
+ "🛃": 248732,
478
+ "🛄": 248879,
479
+ "🛅": 249041,
480
+ "🛋": 249203,
481
+ "🛎": 249113,
482
+ "🛏": 249078,
483
+ "🛐": 248830,
484
+ "🛕": 248859,
485
+ "🛖": 248435,
486
+ "🛗": 248551,
487
+ "🛜": 248519,
488
+ "🛝": 248510,
489
+ "🛞": 248897,
490
+ "🛟": 248981,
491
+ "🛠": 248966,
492
+ "🛡": 248811,
493
+ "🛢": 249240,
494
+ "🛣": 249040,
495
+ "🛤": 248973,
496
+ "🛥": 248436,
497
+ "🛩": 249156,
498
+ "🛬": 248951,
499
+ "🛰": 248656,
500
+ "🛳": 249123,
501
+ "🛴": 249111,
502
+ "🛵": 248857,
503
+ "🛶": 248940,
504
+ "🛷": 248824,
505
+ "🛸": 248562,
506
+ "🛹": 248669,
507
+ "🛺": 248569,
508
+ "🛻": 249017,
509
+ "🛼": 248991,
510
+ "🟠": 249195,
511
+ "🟡": 248460,
512
+ "🟢": 248489,
513
+ "🟣": 248737,
514
+ "🟤": 248936,
515
+ "🟥": 249239,
516
+ "🟦": 248491,
517
+ "🟧": 249166,
518
+ "🟨": 248765,
519
+ "🟩": 248439,
520
+ "🟪": 248787,
521
+ "🟫": 249255,
522
+ "🟰": 248874,
523
+ "🤌": 248620,
524
+ "🤍": 248967,
525
+ "🤎": 249116,
526
+ "🤏": 248699,
527
+ "🤐": 248806,
528
+ "🤒": 248610,
529
+ "🤕": 248475,
530
+ "🤚": 249011,
531
+ "🤛": 249107,
532
+ "🤜": 249152,
533
+ "🤠": 248862,
534
+ "🤢": 249023,
535
+ "🤥": 248628,
536
+ "🤧": 249064,
537
+ "🤫": 248706,
538
+ "🤬": 249219,
539
+ "🤮": 248819,
540
+ "🤯": 249096,
541
+ "🤰": 249207,
542
+ "🤱": 248925,
543
+ "🤳": 248541,
544
+ "🤴": 248573,
545
+ "🤶": 249094,
546
+ "🤸": 248809,
547
+ "🤹": 248570,
548
+ "🤺": 248443,
549
+ "🤼": 249115,
550
+ "🤽": 248794,
551
+ "🤾": 248721,
552
+ "🤿": 249191,
553
+ "🥁": 249118,
554
+ "🥃": 248788,
555
+ "🥄": 248944,
556
+ "🥅": 249101,
557
+ "🥈": 248899,
558
+ "🥉": 248762,
559
+ "🥊": 248768,
560
+ "🥋": 248772,
561
+ "🥌": 249177,
562
+ "🥍": 249051,
563
+ "🥎": 248645,
564
+ "🥏": 248557,
565
+ "🥐": 249131,
566
+ "🥑": 248511,
567
+ "🥒": 249229,
568
+ "🥓": 248751,
569
+ "🥔": 248905,
570
+ "🥕": 248838,
571
+ "🥖": 248470,
572
+ "🥗": 248745,
573
+ "🥘": 248445,
574
+ "🥙": 248463,
575
+ "🥚": 248520,
576
+ "🥛": 248582,
577
+ "🥜": 249083,
578
+ "🥝": 248848,
579
+ "🥞": 248453,
580
+ "🥟": 249033,
581
+ "🥠": 248942,
582
+ "🥡": 248720,
583
+ "🥢": 249168,
584
+ "🥤": 248949,
585
+ "🥥": 248956,
586
+ "🥦": 248432,
587
+ "🥧": 248595,
588
+ "🥨": 248799,
589
+ "🥩": 248928,
590
+ "🥪": 248963,
591
+ "🥫": 248509,
592
+ "🥬": 249237,
593
+ "🥭": 248634,
594
+ "🥮": 248446,
595
+ "🥯": 248757,
596
+ "🥱": 249244,
597
+ "🥲": 248429,
598
+ "🥳": 249128,
599
+ "🥴": 248860,
600
+ "🥵": 249029,
601
+ "🥶": 249147,
602
+ "🥷": 248702,
603
+ "🥸": 248486,
604
+ "🥹": 248431,
605
+ "🥻": 249039,
606
+ "🥼": 248832,
607
+ "🥽": 248875,
608
+ "🥾": 249056,
609
+ "🥿": 249138,
610
+ "🦀": 249201,
611
+ "🦂": 249034,
612
+ "🦃": 248834,
613
+ "🦆": 249042,
614
+ "🦇": 248543,
615
+ "🦈": 249139,
616
+ "🦉": 249008,
617
+ "🦌": 248625,
618
+ "🦍": 248713,
619
+ "🦎": 248911,
620
+ "🦏": 248555,
621
+ "🦐": 248950,
622
+ "🦑": 248746,
623
+ "🦒": 248843,
624
+ "🦓": 248789,
625
+ "🦔": 248641,
626
+ "🦕": 248654,
627
+ "🦖": 248686,
628
+ "🦗": 249235,
629
+ "🦘": 248825,
630
+ "🦙": 248523,
631
+ "🦚": 248797,
632
+ "🦛": 249080,
633
+ "🦜": 248816,
634
+ "🦝": 248661,
635
+ "🦞": 249045,
636
+ "🦟": 248753,
637
+ "🦠": 249085,
638
+ "🦡": 249246,
639
+ "🦢": 248965,
640
+ "🦣": 248578,
641
+ "🦤": 248662,
642
+ "🦥": 248939,
643
+ "🦦": 249161,
644
+ "🦧": 249030,
645
+ "🦨": 248776,
646
+ "🦩": 248624,
647
+ "🦪": 248738,
648
+ "🦫": 248566,
649
+ "🦬": 249245,
650
+ "🦭": 249159,
651
+ "🦮": 248517,
652
+ "🦯": 249089,
653
+ "🦰": 249132,
654
+ "🦱": 248613,
655
+ "🦲": 249047,
656
+ "🦳": 248927,
657
+ "🦴": 248691,
658
+ "🦵": 248885,
659
+ "🦶": 248876,
660
+ "🦷": 248771,
661
+ "🦸": 248538,
662
+ "🦹": 248958,
663
+ "🦺": 248948,
664
+ "🦻": 248729,
665
+ "🦼": 248639,
666
+ "🦽": 248959,
667
+ "🦾": 248507,
668
+ "🦿": 249222,
669
+ "🧀": 248712,
670
+ "🧁": 248840,
671
+ "🧂": 249018,
672
+ "🧃": 249125,
673
+ "🧄": 248723,
674
+ "🧅": 248895,
675
+ "🧆": 248571,
676
+ "🧇": 248767,
677
+ "🧈": 249211,
678
+ "🧉": 248684,
679
+ "🧊": 249092,
680
+ "🧋": 248696,
681
+ "🧌": 249061,
682
+ "🧍": 249218,
683
+ "🧎": 248537,
684
+ "🧏": 248544,
685
+ "🧐": 249095,
686
+ "🧑": 248672,
687
+ "🧒": 248536,
688
+ "🧓": 249145,
689
+ "🧔": 249110,
690
+ "🧕": 249176,
691
+ "🧖": 249025,
692
+ "🧗": 248853,
693
+ "🧘": 248663,
694
+ "🧙": 248605,
695
+ "🧛": 248887,
696
+ "🧝": 248524,
697
+ "🧞": 248558,
698
+ "🧟": 249197,
699
+ "🧠": 248893,
700
+ "🧢": 249035,
701
+ "🧣": 249155,
702
+ "🧤": 248740,
703
+ "🧥": 248657,
704
+ "🧦": 248900,
705
+ "🧧": 248428,
706
+ "🧨": 248793,
707
+ "🧩": 249234,
708
+ "🧪": 249192,
709
+ "🧫": 248975,
710
+ "🧬": 248681,
711
+ "🧭": 249180,
712
+ "🧮": 249049,
713
+ "🧯": 249167,
714
+ "🧰": 248593,
715
+ "🧱": 249112,
716
+ "🧲": 249082,
717
+ "🧳": 249179,
718
+ "🧴": 249104,
719
+ "🧵": 249169,
720
+ "🧶": 248651,
721
+ "🧷": 248977,
722
+ "🧸": 248488,
723
+ "🧹": 249233,
724
+ "🧺": 248921,
725
+ "🧻": 249236,
726
+ "🧼": 248993,
727
+ "🧽": 248545,
728
+ "🧾": 248827,
729
+ "🧿": 249114,
730
+ "🩰": 248852,
731
+ "🩱": 248529,
732
+ "🩲": 249171,
733
+ "🩳": 248970,
734
+ "🩴": 248705,
735
+ "🩵": 248989,
736
+ "🩶": 249243,
737
+ "🩷": 248945,
738
+ "🩸": 248839,
739
+ "🩹": 248477,
740
+ "🩺": 248983,
741
+ "🩻": 248474,
742
+ "🩼": 249127,
743
+ "🪀": 249216,
744
+ "🪁": 249183,
745
+ "🪂": 248448,
746
+ "🪃": 248689,
747
+ "🪄": 248653,
748
+ "🪅": 248440,
749
+ "🪆": 248616,
750
+ "🪇": 249238,
751
+ "🪈": 249038,
752
+ "🪐": 249032,
753
+ "🪑": 249185,
754
+ "🪒": 248467,
755
+ "🪓": 249028,
756
+ "🪔": 248919,
757
+ "🪕": 248759,
758
+ "🪖": 248447,
759
+ "🪗": 249006,
760
+ "🪘": 248643,
761
+ "🪙": 248476,
762
+ "🪚": 249165,
763
+ "🪛": 249077,
764
+ "🪜": 248597,
765
+ "🪝": 248714,
766
+ "🪞": 248687,
767
+ "🪟": 249226,
768
+ "🪠": 248499,
769
+ "🪡": 248841,
770
+ "🪢": 249124,
771
+ "🪣": 248955,
772
+ "🪤": 248668,
773
+ "🪥": 248599,
774
+ "🪦": 248902,
775
+ "🪧": 248535,
776
+ "🪨": 248673,
777
+ "🪩": 248962,
778
+ "🪪": 249223,
779
+ "🪫": 248990,
780
+ "🪬": 248564,
781
+ "🪭": 248926,
782
+ "🪮": 248630,
783
+ "🪯": 248822,
784
+ "🪰": 248829,
785
+ "🪱": 249162,
786
+ "🪲": 248522,
787
+ "🪳": 249037,
788
+ "🪴": 248457,
789
+ "🪵": 248877,
790
+ "🪶": 249151,
791
+ "🪷": 249036,
792
+ "🪸": 248492,
793
+ "🪹": 248592,
794
+ "🪺": 248646,
795
+ "🪻": 248898,
796
+ "🪼": 248952,
797
+ "🪽": 249136,
798
+ "🪿": 249071,
799
+ "🫀": 248442,
800
+ "🫁": 248449,
801
+ "🫂": 248606,
802
+ "🫃": 248637,
803
+ "🫄": 248658,
804
+ "🫅": 248846,
805
+ "🫎": 248894,
806
+ "🫏": 248826,
807
+ "🫐": 248500,
808
+ "🫑": 248456,
809
+ "🫒": 248703,
810
+ "🫓": 248743,
811
+ "🫔": 249126,
812
+ "🫕": 249174,
813
+ "🫖": 248724,
814
+ "🫗": 248652,
815
+ "🫘": 249093,
816
+ "🫙": 249054,
817
+ "🫚": 248800,
818
+ "🫛": 248698,
819
+ "🫠": 249247,
820
+ "🫡": 248968,
821
+ "🫢": 248783,
822
+ "🫣": 248481,
823
+ "🫤": 248986,
824
+ "🫥": 249043,
825
+ "🫦": 248526,
826
+ "🫧": 248548,
827
+ "🫨": 248747,
828
+ "🫰": 249012,
829
+ "🫱": 249090,
830
+ "🫲": 248924,
831
+ "🫳": 249228,
832
+ "🫴": 248987,
833
+ "🫵": 248531,
834
+ "🫶": 249157,
835
+ "🫷": 249091,
836
+ "🫸": 248870
837
+ }
config.json ADDED
@@ -0,0 +1,40 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "clicknext/phayathaibert",
3
+ "architectures": [
4
+ "CamembertForSequenceClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "classifier_dropout": null,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "gelu",
11
+ "hidden_dropout_prob": 0.1,
12
+ "hidden_size": 768,
13
+ "id2label": {
14
+ "0": "LABEL_0",
15
+ "1": "LABEL_1",
16
+ "2": "LABEL_2",
17
+ "3": "LABEL_3"
18
+ },
19
+ "initializer_range": 0.02,
20
+ "intermediate_size": 3072,
21
+ "label2id": {
22
+ "LABEL_0": 0,
23
+ "LABEL_1": 1,
24
+ "LABEL_2": 2,
25
+ "LABEL_3": 3
26
+ },
27
+ "layer_norm_eps": 1e-12,
28
+ "max_position_embeddings": 512,
29
+ "model_type": "camembert",
30
+ "num_attention_heads": 12,
31
+ "num_hidden_layers": 12,
32
+ "pad_token_id": 1,
33
+ "position_embedding_type": "absolute",
34
+ "problem_type": "single_label_classification",
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.40.1",
37
+ "type_vocab_size": 1,
38
+ "use_cache": true,
39
+ "vocab_size": 249262
40
+ }
model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaf08ea07d6399123e24ccea22f9a6192783c65aff174f8e2135aedd03db6c52
3
+ size 1109931736
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:46272d55849f78c8946e719105ad77f106ab150686b803da3ac005850cdd8676
3
+ size 2219983098
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e0756100fa218e165a9a20bec4d70d745156140c6e5abfd5647935cea6c5bcb5
3
+ size 14244
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d432300d416e7d574db386599aa107407f0bd61bdd23f62c7d90a6b7c4ff1d1
3
+ size 1064
sentencepiece.bpe.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e295c936bc0d8b6669ae769a2f8a0363e6d3abcfd8d0869134aa1e903a447d26
3
+ size 5261686
special_tokens_map.json ADDED
@@ -0,0 +1,56 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<s>NOTUSED",
4
+ "</s>NOTUSED",
5
+ "<_>"
6
+ ],
7
+ "bos_token": {
8
+ "content": "<s>",
9
+ "lstrip": false,
10
+ "normalized": false,
11
+ "rstrip": false,
12
+ "single_word": false
13
+ },
14
+ "cls_token": {
15
+ "content": "<s>",
16
+ "lstrip": false,
17
+ "normalized": false,
18
+ "rstrip": false,
19
+ "single_word": false
20
+ },
21
+ "eos_token": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false
27
+ },
28
+ "mask_token": {
29
+ "content": "<mask>",
30
+ "lstrip": true,
31
+ "normalized": true,
32
+ "rstrip": false,
33
+ "single_word": false
34
+ },
35
+ "pad_token": {
36
+ "content": "<pad>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false
41
+ },
42
+ "sep_token": {
43
+ "content": "</s>",
44
+ "lstrip": false,
45
+ "normalized": false,
46
+ "rstrip": false,
47
+ "single_word": false
48
+ },
49
+ "unk_token": {
50
+ "content": "<unk>",
51
+ "lstrip": false,
52
+ "normalized": false,
53
+ "rstrip": false,
54
+ "single_word": false
55
+ }
56
+ }
tokenizer.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6bd9e947fbe9c970a202ea2a4dd511892b6b239078cf5919690e58d35a43e3f2
3
+ size 17349635
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff
 
trainer_state.json ADDED
@@ -0,0 +1,1429 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.762063227953411,
3
+ "best_model_checkpoint": "finetuned_models/wisesight_sentiment/checkpoint-2400",
4
+ "epoch": 5.991124260355029,
5
+ "eval_steps": 100,
6
+ "global_step": 8100,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.07396449704142012,
13
+ "eval_class_f1": {
14
+ "neg": 0.018604651162790697,
15
+ "neu": 0.6997558991049634,
16
+ "pos": 0.0,
17
+ "q": 0.0
18
+ },
19
+ "eval_loss": 1.0810712575912476,
20
+ "eval_macro_average_f1": 0.17959013756693853,
21
+ "eval_micro_average_f1": 0.5391014975041597,
22
+ "eval_runtime": 6.8397,
23
+ "eval_samples_per_second": 351.476,
24
+ "eval_steps_per_second": 22.077,
25
+ "step": 100
26
+ },
27
+ {
28
+ "epoch": 0.14792899408284024,
29
+ "eval_class_f1": {
30
+ "neg": 0.6223055295220243,
31
+ "neu": 0.7497702909647779,
32
+ "pos": 0.0,
33
+ "q": 0.0
34
+ },
35
+ "eval_loss": 0.8820463418960571,
36
+ "eval_macro_average_f1": 0.34301895512170055,
37
+ "eval_micro_average_f1": 0.6472545757071547,
38
+ "eval_runtime": 6.8712,
39
+ "eval_samples_per_second": 349.865,
40
+ "eval_steps_per_second": 21.976,
41
+ "step": 200
42
+ },
43
+ {
44
+ "epoch": 0.22189349112426035,
45
+ "eval_class_f1": {
46
+ "neg": 0.7588757396449703,
47
+ "neu": 0.7731384829505916,
48
+ "pos": 0.27037037037037037,
49
+ "q": 0.0
50
+ },
51
+ "eval_loss": 0.7263810038566589,
52
+ "eval_macro_average_f1": 0.4505961482414831,
53
+ "eval_micro_average_f1": 0.7059068219633944,
54
+ "eval_runtime": 7.0136,
55
+ "eval_samples_per_second": 342.761,
56
+ "eval_steps_per_second": 21.529,
57
+ "step": 300
58
+ },
59
+ {
60
+ "epoch": 0.2958579881656805,
61
+ "eval_class_f1": {
62
+ "neg": 0.7806637806637807,
63
+ "neu": 0.760541586073501,
64
+ "pos": 0.501891551071879,
65
+ "q": 0.0909090909090909
66
+ },
67
+ "eval_loss": 0.6896220445632935,
68
+ "eval_macro_average_f1": 0.5335015021795629,
69
+ "eval_micro_average_f1": 0.7175540765391015,
70
+ "eval_runtime": 7.0909,
71
+ "eval_samples_per_second": 339.026,
72
+ "eval_steps_per_second": 21.295,
73
+ "step": 400
74
+ },
75
+ {
76
+ "epoch": 0.3698224852071006,
77
+ "grad_norm": 5.410265922546387,
78
+ "learning_rate": 1.828817733990148e-05,
79
+ "loss": 0.8994,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.3698224852071006,
84
+ "eval_class_f1": {
85
+ "neg": 0.7767988252569751,
86
+ "neu": 0.7759882869692534,
87
+ "pos": 0.41987179487179493,
88
+ "q": 0.3777777777777778
89
+ },
90
+ "eval_loss": 0.6673027276992798,
91
+ "eval_macro_average_f1": 0.5876091712189503,
92
+ "eval_micro_average_f1": 0.7225457570715474,
93
+ "eval_runtime": 7.071,
94
+ "eval_samples_per_second": 339.978,
95
+ "eval_steps_per_second": 21.355,
96
+ "step": 500
97
+ },
98
+ {
99
+ "epoch": 0.4437869822485207,
100
+ "eval_class_f1": {
101
+ "neg": 0.7704042715484364,
102
+ "neu": 0.7415287628053587,
103
+ "pos": 0.5426356589147286,
104
+ "q": 0.14285714285714285
105
+ },
106
+ "eval_loss": 0.6752218008041382,
107
+ "eval_macro_average_f1": 0.5493564590314166,
108
+ "eval_micro_average_f1": 0.7050748752079867,
109
+ "eval_runtime": 7.2386,
110
+ "eval_samples_per_second": 332.107,
111
+ "eval_steps_per_second": 20.86,
112
+ "step": 600
113
+ },
114
+ {
115
+ "epoch": 0.5177514792899408,
116
+ "eval_class_f1": {
117
+ "neg": 0.7780979827089337,
118
+ "neu": 0.7535296490520371,
119
+ "pos": 0.5532435740514076,
120
+ "q": 0.3870967741935484
121
+ },
122
+ "eval_loss": 0.667128324508667,
123
+ "eval_macro_average_f1": 0.6179919950014817,
124
+ "eval_micro_average_f1": 0.7171381031613977,
125
+ "eval_runtime": 7.3636,
126
+ "eval_samples_per_second": 326.472,
127
+ "eval_steps_per_second": 20.506,
128
+ "step": 700
129
+ },
130
+ {
131
+ "epoch": 0.591715976331361,
132
+ "eval_class_f1": {
133
+ "neg": 0.7555923777961889,
134
+ "neu": 0.7529501332318232,
135
+ "pos": 0.5357575757575757,
136
+ "q": 0.4161073825503356
137
+ },
138
+ "eval_loss": 0.662220299243927,
139
+ "eval_macro_average_f1": 0.6151018673339809,
140
+ "eval_micro_average_f1": 0.7059068219633944,
141
+ "eval_runtime": 7.3848,
142
+ "eval_samples_per_second": 325.535,
143
+ "eval_steps_per_second": 20.448,
144
+ "step": 800
145
+ },
146
+ {
147
+ "epoch": 0.665680473372781,
148
+ "eval_class_f1": {
149
+ "neg": 0.7756714060031595,
150
+ "neu": 0.7722698471859858,
151
+ "pos": 0.5253807106598984,
152
+ "q": 0.2535211267605634
153
+ },
154
+ "eval_loss": 0.6270455121994019,
155
+ "eval_macro_average_f1": 0.5817107726524018,
156
+ "eval_micro_average_f1": 0.7250415973377704,
157
+ "eval_runtime": 7.3625,
158
+ "eval_samples_per_second": 326.52,
159
+ "eval_steps_per_second": 20.509,
160
+ "step": 900
161
+ },
162
+ {
163
+ "epoch": 0.7396449704142012,
164
+ "grad_norm": 7.580224514007568,
165
+ "learning_rate": 2.9252054794520548e-05,
166
+ "loss": 0.6495,
167
+ "step": 1000
168
+ },
169
+ {
170
+ "epoch": 0.7396449704142012,
171
+ "eval_class_f1": {
172
+ "neg": 0.7862993298585256,
173
+ "neu": 0.7885968159940763,
174
+ "pos": 0.5219858156028369,
175
+ "q": 0.30508474576271183
176
+ },
177
+ "eval_loss": 0.6415818929672241,
178
+ "eval_macro_average_f1": 0.6004916768045376,
179
+ "eval_micro_average_f1": 0.742928452579035,
180
+ "eval_runtime": 7.3324,
181
+ "eval_samples_per_second": 327.861,
182
+ "eval_steps_per_second": 20.594,
183
+ "step": 1000
184
+ },
185
+ {
186
+ "epoch": 0.8136094674556213,
187
+ "eval_class_f1": {
188
+ "neg": 0.7824267782426777,
189
+ "neu": 0.7750972762645915,
190
+ "pos": 0.550531914893617,
191
+ "q": 0.1923076923076923
192
+ },
193
+ "eval_loss": 0.6599770188331604,
194
+ "eval_macro_average_f1": 0.5750909154271446,
195
+ "eval_micro_average_f1": 0.7358569051580699,
196
+ "eval_runtime": 7.4168,
197
+ "eval_samples_per_second": 324.129,
198
+ "eval_steps_per_second": 20.359,
199
+ "step": 1100
200
+ },
201
+ {
202
+ "epoch": 0.8875739644970414,
203
+ "eval_class_f1": {
204
+ "neg": 0.7531806615776081,
205
+ "neu": 0.7700414000752727,
206
+ "pos": 0.5685164212910533,
207
+ "q": 0.35955056179775274
208
+ },
209
+ "eval_loss": 0.6348879933357239,
210
+ "eval_macro_average_f1": 0.6128222611854217,
211
+ "eval_micro_average_f1": 0.721297836938436,
212
+ "eval_runtime": 7.4408,
213
+ "eval_samples_per_second": 323.084,
214
+ "eval_steps_per_second": 20.294,
215
+ "step": 1200
216
+ },
217
+ {
218
+ "epoch": 0.9615384615384616,
219
+ "eval_class_f1": {
220
+ "neg": 0.7920646583394563,
221
+ "neu": 0.783076923076923,
222
+ "pos": 0.535014005602241,
223
+ "q": 0.46616541353383456
224
+ },
225
+ "eval_loss": 0.6110679507255554,
226
+ "eval_macro_average_f1": 0.6440802501381137,
227
+ "eval_micro_average_f1": 0.7400166389351082,
228
+ "eval_runtime": 7.4775,
229
+ "eval_samples_per_second": 321.498,
230
+ "eval_steps_per_second": 20.194,
231
+ "step": 1300
232
+ },
233
+ {
234
+ "epoch": 1.0355029585798816,
235
+ "eval_class_f1": {
236
+ "neg": 0.8015094339622642,
237
+ "neu": 0.8065934065934067,
238
+ "pos": 0.5252225519287834,
239
+ "q": 0.379746835443038
240
+ },
241
+ "eval_loss": 0.6416576504707336,
242
+ "eval_macro_average_f1": 0.628268056981873,
243
+ "eval_micro_average_f1": 0.7587354409317804,
244
+ "eval_runtime": 7.358,
245
+ "eval_samples_per_second": 326.721,
246
+ "eval_steps_per_second": 20.522,
247
+ "step": 1400
248
+ },
249
+ {
250
+ "epoch": 1.1094674556213018,
251
+ "grad_norm": 3.8226146697998047,
252
+ "learning_rate": 2.72013698630137e-05,
253
+ "loss": 0.6084,
254
+ "step": 1500
255
+ },
256
+ {
257
+ "epoch": 1.1094674556213018,
258
+ "eval_class_f1": {
259
+ "neg": 0.7846277021617293,
260
+ "neu": 0.8024917552216929,
261
+ "pos": 0.5813953488372093,
262
+ "q": 0.2857142857142857
263
+ },
264
+ "eval_loss": 0.6498740315437317,
265
+ "eval_macro_average_f1": 0.6135572729837293,
266
+ "eval_micro_average_f1": 0.7562396006655574,
267
+ "eval_runtime": 7.2975,
268
+ "eval_samples_per_second": 329.428,
269
+ "eval_steps_per_second": 20.692,
270
+ "step": 1500
271
+ },
272
+ {
273
+ "epoch": 1.183431952662722,
274
+ "eval_class_f1": {
275
+ "neg": 0.7917525773195877,
276
+ "neu": 0.7863247863247863,
277
+ "pos": 0.5444126074498568,
278
+ "q": 0.345679012345679
279
+ },
280
+ "eval_loss": 0.685055673122406,
281
+ "eval_macro_average_f1": 0.6170422458599774,
282
+ "eval_micro_average_f1": 0.7454242928452579,
283
+ "eval_runtime": 7.1996,
284
+ "eval_samples_per_second": 333.908,
285
+ "eval_steps_per_second": 20.973,
286
+ "step": 1600
287
+ },
288
+ {
289
+ "epoch": 1.2573964497041419,
290
+ "eval_class_f1": {
291
+ "neg": 0.7966231772831925,
292
+ "neu": 0.7783018867924529,
293
+ "pos": 0.5695216907675196,
294
+ "q": 0.25806451612903225
295
+ },
296
+ "eval_loss": 0.6685267090797424,
297
+ "eval_macro_average_f1": 0.6006278177430493,
298
+ "eval_micro_average_f1": 0.7375207986688852,
299
+ "eval_runtime": 7.2849,
300
+ "eval_samples_per_second": 329.999,
301
+ "eval_steps_per_second": 20.728,
302
+ "step": 1700
303
+ },
304
+ {
305
+ "epoch": 1.331360946745562,
306
+ "eval_class_f1": {
307
+ "neg": 0.8018362662586075,
308
+ "neu": 0.7977570093457944,
309
+ "pos": 0.5578947368421052,
310
+ "q": 0.36363636363636365
311
+ },
312
+ "eval_loss": 0.6347253918647766,
313
+ "eval_macro_average_f1": 0.6302810940207177,
314
+ "eval_micro_average_f1": 0.7549916805324459,
315
+ "eval_runtime": 7.1263,
316
+ "eval_samples_per_second": 337.343,
317
+ "eval_steps_per_second": 21.189,
318
+ "step": 1800
319
+ },
320
+ {
321
+ "epoch": 1.4053254437869822,
322
+ "eval_class_f1": {
323
+ "neg": 0.7917329093799682,
324
+ "neu": 0.7925512104283055,
325
+ "pos": 0.5742821473158551,
326
+ "q": 0.28125
327
+ },
328
+ "eval_loss": 0.6284430027008057,
329
+ "eval_macro_average_f1": 0.6099540667810323,
330
+ "eval_micro_average_f1": 0.7491680532445923,
331
+ "eval_runtime": 7.2447,
332
+ "eval_samples_per_second": 331.831,
333
+ "eval_steps_per_second": 20.843,
334
+ "step": 1900
335
+ },
336
+ {
337
+ "epoch": 1.4792899408284024,
338
+ "grad_norm": 4.046507835388184,
339
+ "learning_rate": 2.5146575342465757e-05,
340
+ "loss": 0.5135,
341
+ "step": 2000
342
+ },
343
+ {
344
+ "epoch": 1.4792899408284024,
345
+ "eval_class_f1": {
346
+ "neg": 0.7920792079207921,
347
+ "neu": 0.7867370007535796,
348
+ "pos": 0.5517241379310345,
349
+ "q": 0.27586206896551724
350
+ },
351
+ "eval_loss": 0.6431812644004822,
352
+ "eval_macro_average_f1": 0.601600603892731,
353
+ "eval_micro_average_f1": 0.7437603993344426,
354
+ "eval_runtime": 7.2356,
355
+ "eval_samples_per_second": 332.247,
356
+ "eval_steps_per_second": 20.869,
357
+ "step": 2000
358
+ },
359
+ {
360
+ "epoch": 1.5532544378698225,
361
+ "eval_class_f1": {
362
+ "neg": 0.7887550200803214,
363
+ "neu": 0.7950581395348836,
364
+ "pos": 0.555407209612817,
365
+ "q": 0.29032258064516125
366
+ },
367
+ "eval_loss": 0.6327183842658997,
368
+ "eval_macro_average_f1": 0.6073857374682958,
369
+ "eval_micro_average_f1": 0.7495840266222962,
370
+ "eval_runtime": 7.3333,
371
+ "eval_samples_per_second": 327.82,
372
+ "eval_steps_per_second": 20.591,
373
+ "step": 2100
374
+ },
375
+ {
376
+ "epoch": 1.6272189349112427,
377
+ "eval_class_f1": {
378
+ "neg": 0.7658119658119659,
379
+ "neu": 0.8002847988608045,
380
+ "pos": 0.56951871657754,
381
+ "q": 0.4197530864197531
382
+ },
383
+ "eval_loss": 0.6534045338630676,
384
+ "eval_macro_average_f1": 0.6388421419175159,
385
+ "eval_micro_average_f1": 0.7495840266222962,
386
+ "eval_runtime": 7.2578,
387
+ "eval_samples_per_second": 331.231,
388
+ "eval_steps_per_second": 20.805,
389
+ "step": 2200
390
+ },
391
+ {
392
+ "epoch": 1.7011834319526629,
393
+ "eval_class_f1": {
394
+ "neg": 0.7832369942196531,
395
+ "neu": 0.7650099403578529,
396
+ "pos": 0.5829268292682928,
397
+ "q": 0.35955056179775274
398
+ },
399
+ "eval_loss": 0.6581071019172668,
400
+ "eval_macro_average_f1": 0.6226810814108878,
401
+ "eval_micro_average_f1": 0.7316971713810316,
402
+ "eval_runtime": 7.1824,
403
+ "eval_samples_per_second": 334.708,
404
+ "eval_steps_per_second": 21.024,
405
+ "step": 2300
406
+ },
407
+ {
408
+ "epoch": 1.7751479289940828,
409
+ "eval_class_f1": {
410
+ "neg": 0.7951807228915662,
411
+ "neu": 0.8109843081312411,
412
+ "pos": 0.5441595441595442,
413
+ "q": 0.3157894736842105
414
+ },
415
+ "eval_loss": 0.6206311583518982,
416
+ "eval_macro_average_f1": 0.6165285122166405,
417
+ "eval_micro_average_f1": 0.762063227953411,
418
+ "eval_runtime": 7.2501,
419
+ "eval_samples_per_second": 331.583,
420
+ "eval_steps_per_second": 20.827,
421
+ "step": 2400
422
+ },
423
+ {
424
+ "epoch": 1.849112426035503,
425
+ "grad_norm": 6.195135593414307,
426
+ "learning_rate": 2.3091780821917807e-05,
427
+ "loss": 0.4995,
428
+ "step": 2500
429
+ },
430
+ {
431
+ "epoch": 1.849112426035503,
432
+ "eval_class_f1": {
433
+ "neg": 0.7932148626817447,
434
+ "neu": 0.8,
435
+ "pos": 0.5830164765525983,
436
+ "q": 0.196078431372549
437
+ },
438
+ "eval_loss": 0.6029447913169861,
439
+ "eval_macro_average_f1": 0.5930774426517229,
440
+ "eval_micro_average_f1": 0.7562396006655574,
441
+ "eval_runtime": 7.1935,
442
+ "eval_samples_per_second": 334.192,
443
+ "eval_steps_per_second": 20.991,
444
+ "step": 2500
445
+ },
446
+ {
447
+ "epoch": 1.9230769230769231,
448
+ "eval_class_f1": {
449
+ "neg": 0.8059236165237724,
450
+ "neu": 0.7899159663865546,
451
+ "pos": 0.579415501905972,
452
+ "q": 0.36666666666666664
453
+ },
454
+ "eval_loss": 0.6066814064979553,
455
+ "eval_macro_average_f1": 0.6354804378707414,
456
+ "eval_micro_average_f1": 0.7491680532445923,
457
+ "eval_runtime": 7.2817,
458
+ "eval_samples_per_second": 330.143,
459
+ "eval_steps_per_second": 20.737,
460
+ "step": 2600
461
+ },
462
+ {
463
+ "epoch": 1.997041420118343,
464
+ "eval_class_f1": {
465
+ "neg": 0.8003157063930545,
466
+ "neu": 0.797884397431054,
467
+ "pos": 0.5773447015834348,
468
+ "q": 0.3835616438356164
469
+ },
470
+ "eval_loss": 0.630171537399292,
471
+ "eval_macro_average_f1": 0.63977661231079,
472
+ "eval_micro_average_f1": 0.7545757071547421,
473
+ "eval_runtime": 7.2376,
474
+ "eval_samples_per_second": 332.156,
475
+ "eval_steps_per_second": 20.863,
476
+ "step": 2700
477
+ },
478
+ {
479
+ "epoch": 2.0710059171597632,
480
+ "eval_class_f1": {
481
+ "neg": 0.7848509266720386,
482
+ "neu": 0.7945103857566765,
483
+ "pos": 0.5853051058530511,
484
+ "q": 0.35294117647058826
485
+ },
486
+ "eval_loss": 0.7064331769943237,
487
+ "eval_macro_average_f1": 0.6294018986880886,
488
+ "eval_micro_average_f1": 0.7508319467554077,
489
+ "eval_runtime": 7.2934,
490
+ "eval_samples_per_second": 329.612,
491
+ "eval_steps_per_second": 20.704,
492
+ "step": 2800
493
+ },
494
+ {
495
+ "epoch": 2.1449704142011834,
496
+ "eval_class_f1": {
497
+ "neg": 0.797752808988764,
498
+ "neu": 0.8026412325752018,
499
+ "pos": 0.5824742268041238,
500
+ "q": 0.26666666666666666
501
+ },
502
+ "eval_loss": 0.7201129794120789,
503
+ "eval_macro_average_f1": 0.612383733758689,
504
+ "eval_micro_average_f1": 0.7591514143094842,
505
+ "eval_runtime": 7.1883,
506
+ "eval_samples_per_second": 334.433,
507
+ "eval_steps_per_second": 21.006,
508
+ "step": 2900
509
+ },
510
+ {
511
+ "epoch": 2.2189349112426036,
512
+ "grad_norm": 6.065237045288086,
513
+ "learning_rate": 2.1036986301369864e-05,
514
+ "loss": 0.4003,
515
+ "step": 3000
516
+ },
517
+ {
518
+ "epoch": 2.2189349112426036,
519
+ "eval_class_f1": {
520
+ "neg": 0.7861271676300579,
521
+ "neu": 0.800578034682081,
522
+ "pos": 0.5670391061452514,
523
+ "q": 0.3185840707964602
524
+ },
525
+ "eval_loss": 0.7178497910499573,
526
+ "eval_macro_average_f1": 0.6180820948134627,
527
+ "eval_micro_average_f1": 0.7508319467554077,
528
+ "eval_runtime": 7.2106,
529
+ "eval_samples_per_second": 333.398,
530
+ "eval_steps_per_second": 20.941,
531
+ "step": 3000
532
+ },
533
+ {
534
+ "epoch": 2.2928994082840237,
535
+ "eval_class_f1": {
536
+ "neg": 0.7955801104972374,
537
+ "neu": 0.781854043392505,
538
+ "pos": 0.5852585258525853,
539
+ "q": 0.28865979381443296
540
+ },
541
+ "eval_loss": 0.7727176547050476,
542
+ "eval_macro_average_f1": 0.6128381183891901,
543
+ "eval_micro_average_f1": 0.7383527454242929,
544
+ "eval_runtime": 7.2299,
545
+ "eval_samples_per_second": 332.51,
546
+ "eval_steps_per_second": 20.886,
547
+ "step": 3100
548
+ },
549
+ {
550
+ "epoch": 2.366863905325444,
551
+ "eval_class_f1": {
552
+ "neg": 0.7893462469733656,
553
+ "neu": 0.788983997022702,
554
+ "pos": 0.5606060606060607,
555
+ "q": 0.2888888888888889
556
+ },
557
+ "eval_loss": 0.7219040393829346,
558
+ "eval_macro_average_f1": 0.6069562983727543,
559
+ "eval_micro_average_f1": 0.7420965058236273,
560
+ "eval_runtime": 7.2669,
561
+ "eval_samples_per_second": 330.815,
562
+ "eval_steps_per_second": 20.779,
563
+ "step": 3200
564
+ },
565
+ {
566
+ "epoch": 2.440828402366864,
567
+ "eval_class_f1": {
568
+ "neg": 0.8073115003808072,
569
+ "neu": 0.7814829344841114,
570
+ "pos": 0.5855338691159586,
571
+ "q": 0.26666666666666666
572
+ },
573
+ "eval_loss": 0.7229210734367371,
574
+ "eval_macro_average_f1": 0.610248742661886,
575
+ "eval_micro_average_f1": 0.7450083194675541,
576
+ "eval_runtime": 7.1283,
577
+ "eval_samples_per_second": 337.248,
578
+ "eval_steps_per_second": 21.183,
579
+ "step": 3300
580
+ },
581
+ {
582
+ "epoch": 2.5147928994082838,
583
+ "eval_class_f1": {
584
+ "neg": 0.7984790874524715,
585
+ "neu": 0.7856049004594182,
586
+ "pos": 0.5773447015834348,
587
+ "q": 0.3
588
+ },
589
+ "eval_loss": 0.7037935853004456,
590
+ "eval_macro_average_f1": 0.615357172373831,
591
+ "eval_micro_average_f1": 0.747504159733777,
592
+ "eval_runtime": 7.2219,
593
+ "eval_samples_per_second": 332.876,
594
+ "eval_steps_per_second": 20.909,
595
+ "step": 3400
596
+ },
597
+ {
598
+ "epoch": 2.5887573964497044,
599
+ "grad_norm": 3.8475677967071533,
600
+ "learning_rate": 1.8982191780821918e-05,
601
+ "loss": 0.3579,
602
+ "step": 3500
603
+ },
604
+ {
605
+ "epoch": 2.5887573964497044,
606
+ "eval_class_f1": {
607
+ "neg": 0.7871815940838127,
608
+ "neu": 0.7871305649083427,
609
+ "pos": 0.5738916256157636,
610
+ "q": 0.37735849056603776
611
+ },
612
+ "eval_loss": 0.7569752931594849,
613
+ "eval_macro_average_f1": 0.6313905687934891,
614
+ "eval_micro_average_f1": 0.7420965058236273,
615
+ "eval_runtime": 7.3391,
616
+ "eval_samples_per_second": 327.56,
617
+ "eval_steps_per_second": 20.575,
618
+ "step": 3500
619
+ },
620
+ {
621
+ "epoch": 2.662721893491124,
622
+ "eval_class_f1": {
623
+ "neg": 0.8064269319051262,
624
+ "neu": 0.7905718701700155,
625
+ "pos": 0.5779927448609432,
626
+ "q": 0.3255813953488372
627
+ },
628
+ "eval_loss": 0.7201011180877686,
629
+ "eval_macro_average_f1": 0.6251432355712305,
630
+ "eval_micro_average_f1": 0.75,
631
+ "eval_runtime": 7.2188,
632
+ "eval_samples_per_second": 333.02,
633
+ "eval_steps_per_second": 20.918,
634
+ "step": 3600
635
+ },
636
+ {
637
+ "epoch": 2.7366863905325443,
638
+ "eval_class_f1": {
639
+ "neg": 0.7847896440129449,
640
+ "neu": 0.7701911822083495,
641
+ "pos": 0.5797413793103448,
642
+ "q": 0.3703703703703704
643
+ },
644
+ "eval_loss": 0.7302864789962769,
645
+ "eval_macro_average_f1": 0.6262731439755023,
646
+ "eval_micro_average_f1": 0.7304492512479202,
647
+ "eval_runtime": 7.2541,
648
+ "eval_samples_per_second": 331.401,
649
+ "eval_steps_per_second": 20.816,
650
+ "step": 3700
651
+ },
652
+ {
653
+ "epoch": 2.8106508875739644,
654
+ "eval_class_f1": {
655
+ "neg": 0.7971698113207547,
656
+ "neu": 0.8014842300556586,
657
+ "pos": 0.5839793281653747,
658
+ "q": 0.3283582089552239
659
+ },
660
+ "eval_loss": 0.7112248539924622,
661
+ "eval_macro_average_f1": 0.627747894624253,
662
+ "eval_micro_average_f1": 0.7587354409317804,
663
+ "eval_runtime": 7.2905,
664
+ "eval_samples_per_second": 329.743,
665
+ "eval_steps_per_second": 20.712,
666
+ "step": 3800
667
+ },
668
+ {
669
+ "epoch": 2.8846153846153846,
670
+ "eval_class_f1": {
671
+ "neg": 0.7999999999999999,
672
+ "neu": 0.7944066515495087,
673
+ "pos": 0.5961995249406176,
674
+ "q": 0.2545454545454545
675
+ },
676
+ "eval_loss": 0.7105884552001953,
677
+ "eval_macro_average_f1": 0.6112879077588952,
678
+ "eval_micro_average_f1": 0.7549916805324459,
679
+ "eval_runtime": 7.4167,
680
+ "eval_samples_per_second": 324.132,
681
+ "eval_steps_per_second": 20.359,
682
+ "step": 3900
683
+ },
684
+ {
685
+ "epoch": 2.9585798816568047,
686
+ "grad_norm": 8.97050666809082,
687
+ "learning_rate": 1.6927397260273975e-05,
688
+ "loss": 0.3409,
689
+ "step": 4000
690
+ },
691
+ {
692
+ "epoch": 2.9585798816568047,
693
+ "eval_class_f1": {
694
+ "neg": 0.803088803088803,
695
+ "neu": 0.7901328273244782,
696
+ "pos": 0.5671641791044775,
697
+ "q": 0.3513513513513513
698
+ },
699
+ "eval_loss": 0.7364293932914734,
700
+ "eval_macro_average_f1": 0.6279342902172774,
701
+ "eval_micro_average_f1": 0.7495840266222962,
702
+ "eval_runtime": 7.2488,
703
+ "eval_samples_per_second": 331.641,
704
+ "eval_steps_per_second": 20.831,
705
+ "step": 4000
706
+ },
707
+ {
708
+ "epoch": 3.032544378698225,
709
+ "eval_class_f1": {
710
+ "neg": 0.7924836601307189,
711
+ "neu": 0.7892777364110202,
712
+ "pos": 0.5696969696969698,
713
+ "q": 0.3287671232876712
714
+ },
715
+ "eval_loss": 0.8425710201263428,
716
+ "eval_macro_average_f1": 0.6200563723815951,
717
+ "eval_micro_average_f1": 0.7454242928452579,
718
+ "eval_runtime": 7.1671,
719
+ "eval_samples_per_second": 335.422,
720
+ "eval_steps_per_second": 21.068,
721
+ "step": 4100
722
+ },
723
+ {
724
+ "epoch": 3.106508875739645,
725
+ "eval_class_f1": {
726
+ "neg": 0.7883817427385893,
727
+ "neu": 0.7684537684537684,
728
+ "pos": 0.5720338983050848,
729
+ "q": 0.35294117647058826
730
+ },
731
+ "eval_loss": 0.9264113306999207,
732
+ "eval_macro_average_f1": 0.6204526464920077,
733
+ "eval_micro_average_f1": 0.7275374376039934,
734
+ "eval_runtime": 7.2924,
735
+ "eval_samples_per_second": 329.656,
736
+ "eval_steps_per_second": 20.706,
737
+ "step": 4200
738
+ },
739
+ {
740
+ "epoch": 3.1804733727810652,
741
+ "eval_class_f1": {
742
+ "neg": 0.8064269319051262,
743
+ "neu": 0.7787333854573885,
744
+ "pos": 0.5774647887323944,
745
+ "q": 0.32967032967032966
746
+ },
747
+ "eval_loss": 0.9222328662872314,
748
+ "eval_macro_average_f1": 0.6230738589413097,
749
+ "eval_micro_average_f1": 0.7420965058236273,
750
+ "eval_runtime": 7.2012,
751
+ "eval_samples_per_second": 333.833,
752
+ "eval_steps_per_second": 20.969,
753
+ "step": 4300
754
+ },
755
+ {
756
+ "epoch": 3.2544378698224854,
757
+ "eval_class_f1": {
758
+ "neg": 0.7999999999999999,
759
+ "neu": 0.7803557617942769,
760
+ "pos": 0.5765124555160142,
761
+ "q": 0.35955056179775274
762
+ },
763
+ "eval_loss": 0.9496058821678162,
764
+ "eval_macro_average_f1": 0.6291046947770109,
765
+ "eval_micro_average_f1": 0.7420965058236273,
766
+ "eval_runtime": 7.3334,
767
+ "eval_samples_per_second": 327.814,
768
+ "eval_steps_per_second": 20.591,
769
+ "step": 4400
770
+ },
771
+ {
772
+ "epoch": 3.328402366863905,
773
+ "grad_norm": 12.435276985168457,
774
+ "learning_rate": 1.4872602739726027e-05,
775
+ "loss": 0.2249,
776
+ "step": 4500
777
+ },
778
+ {
779
+ "epoch": 3.328402366863905,
780
+ "eval_class_f1": {
781
+ "neg": 0.8012718600953895,
782
+ "neu": 0.784238714613619,
783
+ "pos": 0.5663082437275986,
784
+ "q": 0.32323232323232326
785
+ },
786
+ "eval_loss": 0.9026820063591003,
787
+ "eval_macro_average_f1": 0.6187627854172325,
788
+ "eval_micro_average_f1": 0.7412645590682196,
789
+ "eval_runtime": 7.1404,
790
+ "eval_samples_per_second": 336.677,
791
+ "eval_steps_per_second": 21.147,
792
+ "step": 4500
793
+ },
794
+ {
795
+ "epoch": 3.4023668639053253,
796
+ "eval_class_f1": {
797
+ "neg": 0.8043647700701482,
798
+ "neu": 0.7884322678843227,
799
+ "pos": 0.5676328502415459,
800
+ "q": 0.2898550724637681
801
+ },
802
+ "eval_loss": 0.943065345287323,
803
+ "eval_macro_average_f1": 0.6125712401649462,
804
+ "eval_micro_average_f1": 0.747504159733777,
805
+ "eval_runtime": 7.2681,
806
+ "eval_samples_per_second": 330.759,
807
+ "eval_steps_per_second": 20.776,
808
+ "step": 4600
809
+ },
810
+ {
811
+ "epoch": 3.4763313609467454,
812
+ "eval_class_f1": {
813
+ "neg": 0.8018504240555128,
814
+ "neu": 0.7930382141505864,
815
+ "pos": 0.5692503176620076,
816
+ "q": 0.345679012345679
817
+ },
818
+ "eval_loss": 0.9825762510299683,
819
+ "eval_macro_average_f1": 0.6274544920534464,
820
+ "eval_micro_average_f1": 0.7512479201331115,
821
+ "eval_runtime": 7.2921,
822
+ "eval_samples_per_second": 329.672,
823
+ "eval_steps_per_second": 20.707,
824
+ "step": 4700
825
+ },
826
+ {
827
+ "epoch": 3.5502958579881656,
828
+ "eval_class_f1": {
829
+ "neg": 0.7946498819826908,
830
+ "neu": 0.7813455657492355,
831
+ "pos": 0.5795053003533569,
832
+ "q": 0.3055555555555555
833
+ },
834
+ "eval_loss": 0.9374552965164185,
835
+ "eval_macro_average_f1": 0.6152640759102097,
836
+ "eval_micro_average_f1": 0.7420965058236273,
837
+ "eval_runtime": 7.387,
838
+ "eval_samples_per_second": 325.436,
839
+ "eval_steps_per_second": 20.441,
840
+ "step": 4800
841
+ },
842
+ {
843
+ "epoch": 3.6242603550295858,
844
+ "eval_class_f1": {
845
+ "neg": 0.7984790874524715,
846
+ "neu": 0.7785547785547785,
847
+ "pos": 0.5748218527315915,
848
+ "q": 0.3116883116883117
849
+ },
850
+ "eval_loss": 0.9656402468681335,
851
+ "eval_macro_average_f1": 0.6158860076067884,
852
+ "eval_micro_average_f1": 0.7408485856905158,
853
+ "eval_runtime": 7.4821,
854
+ "eval_samples_per_second": 321.299,
855
+ "eval_steps_per_second": 20.181,
856
+ "step": 4900
857
+ },
858
+ {
859
+ "epoch": 3.698224852071006,
860
+ "grad_norm": 0.6623280644416809,
861
+ "learning_rate": 1.2817808219178083e-05,
862
+ "loss": 0.2207,
863
+ "step": 5000
864
+ },
865
+ {
866
+ "epoch": 3.698224852071006,
867
+ "eval_class_f1": {
868
+ "neg": 0.7987616099071206,
869
+ "neu": 0.7862857142857143,
870
+ "pos": 0.5647348951911221,
871
+ "q": 0.3
872
+ },
873
+ "eval_loss": 0.9422620534896851,
874
+ "eval_macro_average_f1": 0.6124455548459892,
875
+ "eval_micro_average_f1": 0.7441763727121464,
876
+ "eval_runtime": 7.2765,
877
+ "eval_samples_per_second": 330.379,
878
+ "eval_steps_per_second": 20.752,
879
+ "step": 5000
880
+ },
881
+ {
882
+ "epoch": 3.772189349112426,
883
+ "eval_class_f1": {
884
+ "neg": 0.8,
885
+ "neu": 0.7671342685370742,
886
+ "pos": 0.5714285714285714,
887
+ "q": 0.3414634146341463
888
+ },
889
+ "eval_loss": 0.9625053405761719,
890
+ "eval_macro_average_f1": 0.620006563649948,
891
+ "eval_micro_average_f1": 0.7296173044925125,
892
+ "eval_runtime": 7.3445,
893
+ "eval_samples_per_second": 327.318,
894
+ "eval_steps_per_second": 20.559,
895
+ "step": 5100
896
+ },
897
+ {
898
+ "epoch": 3.8461538461538463,
899
+ "eval_class_f1": {
900
+ "neg": 0.8018942383583267,
901
+ "neu": 0.7968691762951919,
902
+ "pos": 0.5706874189364461,
903
+ "q": 0.25287356321839083
904
+ },
905
+ "eval_loss": 0.9822611212730408,
906
+ "eval_macro_average_f1": 0.6055810992020889,
907
+ "eval_micro_average_f1": 0.7520798668885191,
908
+ "eval_runtime": 7.4499,
909
+ "eval_samples_per_second": 322.688,
910
+ "eval_steps_per_second": 20.269,
911
+ "step": 5200
912
+ },
913
+ {
914
+ "epoch": 3.9201183431952664,
915
+ "eval_class_f1": {
916
+ "neg": 0.7891268533772653,
917
+ "neu": 0.7875375375375375,
918
+ "pos": 0.5810968494749125,
919
+ "q": 0.273972602739726
920
+ },
921
+ "eval_loss": 0.9442653656005859,
922
+ "eval_macro_average_f1": 0.6079334607823603,
923
+ "eval_micro_average_f1": 0.7433444259567388,
924
+ "eval_runtime": 7.3731,
925
+ "eval_samples_per_second": 326.052,
926
+ "eval_steps_per_second": 20.48,
927
+ "step": 5300
928
+ },
929
+ {
930
+ "epoch": 3.994082840236686,
931
+ "eval_class_f1": {
932
+ "neg": 0.8046511627906977,
933
+ "neu": 0.7945516458569808,
934
+ "pos": 0.5775,
935
+ "q": 0.32
936
+ },
937
+ "eval_loss": 0.9429491758346558,
938
+ "eval_macro_average_f1": 0.6241757021619195,
939
+ "eval_micro_average_f1": 0.7537437603993344,
940
+ "eval_runtime": 7.3966,
941
+ "eval_samples_per_second": 325.013,
942
+ "eval_steps_per_second": 20.415,
943
+ "step": 5400
944
+ },
945
+ {
946
+ "epoch": 4.068047337278107,
947
+ "grad_norm": 2.4124114513397217,
948
+ "learning_rate": 1.0763013698630138e-05,
949
+ "loss": 0.2077,
950
+ "step": 5500
951
+ },
952
+ {
953
+ "epoch": 4.068047337278107,
954
+ "eval_class_f1": {
955
+ "neg": 0.8063781321184511,
956
+ "neu": 0.7866927592954991,
957
+ "pos": 0.5862884160756501,
958
+ "q": 0.3333333333333333
959
+ },
960
+ "eval_loss": 1.1077452898025513,
961
+ "eval_macro_average_f1": 0.6281731602057334,
962
+ "eval_micro_average_f1": 0.7483361064891847,
963
+ "eval_runtime": 7.369,
964
+ "eval_samples_per_second": 326.23,
965
+ "eval_steps_per_second": 20.491,
966
+ "step": 5500
967
+ },
968
+ {
969
+ "epoch": 4.1420118343195265,
970
+ "eval_class_f1": {
971
+ "neg": 0.7993920972644377,
972
+ "neu": 0.7660256410256411,
973
+ "pos": 0.5726775956284154,
974
+ "q": 0.29629629629629634
975
+ },
976
+ "eval_loss": 1.1472598314285278,
977
+ "eval_macro_average_f1": 0.6085979075536977,
978
+ "eval_micro_average_f1": 0.7304492512479202,
979
+ "eval_runtime": 7.5033,
980
+ "eval_samples_per_second": 320.394,
981
+ "eval_steps_per_second": 20.125,
982
+ "step": 5600
983
+ },
984
+ {
985
+ "epoch": 4.215976331360947,
986
+ "eval_class_f1": {
987
+ "neg": 0.8024502297090352,
988
+ "neu": 0.7868978805394989,
989
+ "pos": 0.5731559854897219,
990
+ "q": 0.3
991
+ },
992
+ "eval_loss": 1.169406533241272,
993
+ "eval_macro_average_f1": 0.615626023934564,
994
+ "eval_micro_average_f1": 0.7462562396006656,
995
+ "eval_runtime": 7.4833,
996
+ "eval_samples_per_second": 321.249,
997
+ "eval_steps_per_second": 20.178,
998
+ "step": 5700
999
+ },
1000
+ {
1001
+ "epoch": 4.289940828402367,
1002
+ "eval_class_f1": {
1003
+ "neg": 0.803951367781155,
1004
+ "neu": 0.7902550437761706,
1005
+ "pos": 0.5685019206145967,
1006
+ "q": 0.30952380952380953
1007
+ },
1008
+ "eval_loss": 1.1968339681625366,
1009
+ "eval_macro_average_f1": 0.6180580354239329,
1010
+ "eval_micro_average_f1": 0.7495840266222962,
1011
+ "eval_runtime": 7.4382,
1012
+ "eval_samples_per_second": 323.197,
1013
+ "eval_steps_per_second": 20.301,
1014
+ "step": 5800
1015
+ },
1016
+ {
1017
+ "epoch": 4.363905325443787,
1018
+ "eval_class_f1": {
1019
+ "neg": 0.8024786986831913,
1020
+ "neu": 0.7916030534351146,
1021
+ "pos": 0.5773955773955775,
1022
+ "q": 0.3373493975903615
1023
+ },
1024
+ "eval_loss": 1.1896393299102783,
1025
+ "eval_macro_average_f1": 0.6272066817760612,
1026
+ "eval_micro_average_f1": 0.7504159733777038,
1027
+ "eval_runtime": 7.423,
1028
+ "eval_samples_per_second": 323.857,
1029
+ "eval_steps_per_second": 20.342,
1030
+ "step": 5900
1031
+ },
1032
+ {
1033
+ "epoch": 4.437869822485207,
1034
+ "grad_norm": 0.9506312608718872,
1035
+ "learning_rate": 8.708219178082192e-06,
1036
+ "loss": 0.1324,
1037
+ "step": 6000
1038
+ },
1039
+ {
1040
+ "epoch": 4.437869822485207,
1041
+ "eval_class_f1": {
1042
+ "neg": 0.8024316109422492,
1043
+ "neu": 0.7925840092699884,
1044
+ "pos": 0.5878048780487805,
1045
+ "q": 0.28915662650602414
1046
+ },
1047
+ "eval_loss": 1.2535008192062378,
1048
+ "eval_macro_average_f1": 0.6179942811917606,
1049
+ "eval_micro_average_f1": 0.7516638935108153,
1050
+ "eval_runtime": 7.3808,
1051
+ "eval_samples_per_second": 325.711,
1052
+ "eval_steps_per_second": 20.459,
1053
+ "step": 6000
1054
+ },
1055
+ {
1056
+ "epoch": 4.511834319526627,
1057
+ "eval_class_f1": {
1058
+ "neg": 0.7901821060965954,
1059
+ "neu": 0.7819374758780393,
1060
+ "pos": 0.5821064552661382,
1061
+ "q": 0.2535211267605634
1062
+ },
1063
+ "eval_loss": 1.2182434797286987,
1064
+ "eval_macro_average_f1": 0.601936791000334,
1065
+ "eval_micro_average_f1": 0.7396006655574043,
1066
+ "eval_runtime": 7.3498,
1067
+ "eval_samples_per_second": 327.085,
1068
+ "eval_steps_per_second": 20.545,
1069
+ "step": 6100
1070
+ },
1071
+ {
1072
+ "epoch": 4.585798816568047,
1073
+ "eval_class_f1": {
1074
+ "neg": 0.7945425361155697,
1075
+ "neu": 0.7956989247311828,
1076
+ "pos": 0.5761006289308177,
1077
+ "q": 0.22857142857142854
1078
+ },
1079
+ "eval_loss": 1.2836171388626099,
1080
+ "eval_macro_average_f1": 0.5987283795872497,
1081
+ "eval_micro_average_f1": 0.7508319467554077,
1082
+ "eval_runtime": 7.4332,
1083
+ "eval_samples_per_second": 323.412,
1084
+ "eval_steps_per_second": 20.314,
1085
+ "step": 6200
1086
+ },
1087
+ {
1088
+ "epoch": 4.659763313609467,
1089
+ "eval_class_f1": {
1090
+ "neg": 0.8003025718608169,
1091
+ "neu": 0.7733970529669454,
1092
+ "pos": 0.5691609977324262,
1093
+ "q": 0.3225806451612903
1094
+ },
1095
+ "eval_loss": 1.2842472791671753,
1096
+ "eval_macro_average_f1": 0.6163603169303697,
1097
+ "eval_micro_average_f1": 0.7346089850249584,
1098
+ "eval_runtime": 7.4941,
1099
+ "eval_samples_per_second": 320.786,
1100
+ "eval_steps_per_second": 20.149,
1101
+ "step": 6300
1102
+ },
1103
+ {
1104
+ "epoch": 4.733727810650888,
1105
+ "eval_class_f1": {
1106
+ "neg": 0.799375487900078,
1107
+ "neu": 0.7936865839909809,
1108
+ "pos": 0.5614489003880984,
1109
+ "q": 0.3225806451612903
1110
+ },
1111
+ "eval_loss": 1.3067219257354736,
1112
+ "eval_macro_average_f1": 0.619272904360112,
1113
+ "eval_micro_average_f1": 0.7487520798668885,
1114
+ "eval_runtime": 7.3805,
1115
+ "eval_samples_per_second": 325.723,
1116
+ "eval_steps_per_second": 20.459,
1117
+ "step": 6400
1118
+ },
1119
+ {
1120
+ "epoch": 4.8076923076923075,
1121
+ "grad_norm": 0.2732117772102356,
1122
+ "learning_rate": 6.653424657534246e-06,
1123
+ "loss": 0.1441,
1124
+ "step": 6500
1125
+ },
1126
+ {
1127
+ "epoch": 4.8076923076923075,
1128
+ "eval_class_f1": {
1129
+ "neg": 0.8027628549501151,
1130
+ "neu": 0.7751572327044026,
1131
+ "pos": 0.5694760820045559,
1132
+ "q": 0.3132530120481927
1133
+ },
1134
+ "eval_loss": 1.2718240022659302,
1135
+ "eval_macro_average_f1": 0.6151622954268166,
1136
+ "eval_micro_average_f1": 0.7371048252911814,
1137
+ "eval_runtime": 7.3822,
1138
+ "eval_samples_per_second": 325.649,
1139
+ "eval_steps_per_second": 20.455,
1140
+ "step": 6500
1141
+ },
1142
+ {
1143
+ "epoch": 4.881656804733728,
1144
+ "eval_class_f1": {
1145
+ "neg": 0.796875,
1146
+ "neu": 0.7868601986249045,
1147
+ "pos": 0.5735115431348725,
1148
+ "q": 0.29885057471264365
1149
+ },
1150
+ "eval_loss": 1.261472225189209,
1151
+ "eval_macro_average_f1": 0.6140243291181051,
1152
+ "eval_micro_average_f1": 0.7441763727121464,
1153
+ "eval_runtime": 7.5114,
1154
+ "eval_samples_per_second": 320.048,
1155
+ "eval_steps_per_second": 20.103,
1156
+ "step": 6600
1157
+ },
1158
+ {
1159
+ "epoch": 4.955621301775148,
1160
+ "eval_class_f1": {
1161
+ "neg": 0.7930763178599529,
1162
+ "neu": 0.7766536964980545,
1163
+ "pos": 0.5714285714285715,
1164
+ "q": 0.35294117647058826
1165
+ },
1166
+ "eval_loss": 1.2753080129623413,
1167
+ "eval_macro_average_f1": 0.6235249405642919,
1168
+ "eval_micro_average_f1": 0.7358569051580699,
1169
+ "eval_runtime": 7.3656,
1170
+ "eval_samples_per_second": 326.381,
1171
+ "eval_steps_per_second": 20.501,
1172
+ "step": 6700
1173
+ },
1174
+ {
1175
+ "epoch": 5.029585798816568,
1176
+ "eval_class_f1": {
1177
+ "neg": 0.7962962962962963,
1178
+ "neu": 0.7754943776657619,
1179
+ "pos": 0.5657276995305165,
1180
+ "q": 0.345679012345679
1181
+ },
1182
+ "eval_loss": 1.3079357147216797,
1183
+ "eval_macro_average_f1": 0.6207993464595634,
1184
+ "eval_micro_average_f1": 0.7366888519134775,
1185
+ "eval_runtime": 7.4026,
1186
+ "eval_samples_per_second": 324.753,
1187
+ "eval_steps_per_second": 20.398,
1188
+ "step": 6800
1189
+ },
1190
+ {
1191
+ "epoch": 5.103550295857988,
1192
+ "eval_class_f1": {
1193
+ "neg": 0.7972136222910216,
1194
+ "neu": 0.7786790266512167,
1195
+ "pos": 0.5721040189125295,
1196
+ "q": 0.3703703703703704
1197
+ },
1198
+ "eval_loss": 1.3499900102615356,
1199
+ "eval_macro_average_f1": 0.6295917595562845,
1200
+ "eval_micro_average_f1": 0.740432612312812,
1201
+ "eval_runtime": 7.3777,
1202
+ "eval_samples_per_second": 325.846,
1203
+ "eval_steps_per_second": 20.467,
1204
+ "step": 6900
1205
+ },
1206
+ {
1207
+ "epoch": 5.177514792899408,
1208
+ "grad_norm": 11.024497985839844,
1209
+ "learning_rate": 4.598630136986302e-06,
1210
+ "loss": 0.1111,
1211
+ "step": 7000
1212
+ },
1213
+ {
1214
+ "epoch": 5.177514792899408,
1215
+ "eval_class_f1": {
1216
+ "neg": 0.7956147220046985,
1217
+ "neu": 0.7807853602744949,
1218
+ "pos": 0.5693606755126658,
1219
+ "q": 0.3544303797468354
1220
+ },
1221
+ "eval_loss": 1.4051584005355835,
1222
+ "eval_macro_average_f1": 0.6250477843846737,
1223
+ "eval_micro_average_f1": 0.7412645590682196,
1224
+ "eval_runtime": 7.2531,
1225
+ "eval_samples_per_second": 331.444,
1226
+ "eval_steps_per_second": 20.819,
1227
+ "step": 7000
1228
+ },
1229
+ {
1230
+ "epoch": 5.2514792899408285,
1231
+ "eval_class_f1": {
1232
+ "neg": 0.7925407925407926,
1233
+ "neu": 0.7769230769230769,
1234
+ "pos": 0.5737898465171192,
1235
+ "q": 0.27027027027027023
1236
+ },
1237
+ "eval_loss": 1.4020917415618896,
1238
+ "eval_macro_average_f1": 0.6033809965628147,
1239
+ "eval_micro_average_f1": 0.7375207986688852,
1240
+ "eval_runtime": 7.404,
1241
+ "eval_samples_per_second": 324.69,
1242
+ "eval_steps_per_second": 20.394,
1243
+ "step": 7100
1244
+ },
1245
+ {
1246
+ "epoch": 5.325443786982248,
1247
+ "eval_class_f1": {
1248
+ "neg": 0.7949326999208235,
1249
+ "neu": 0.7753846153846154,
1250
+ "pos": 0.5727482678983833,
1251
+ "q": 0.27848101265822783
1252
+ },
1253
+ "eval_loss": 1.4238033294677734,
1254
+ "eval_macro_average_f1": 0.6053866489655125,
1255
+ "eval_micro_average_f1": 0.7358569051580699,
1256
+ "eval_runtime": 7.4328,
1257
+ "eval_samples_per_second": 323.433,
1258
+ "eval_steps_per_second": 20.315,
1259
+ "step": 7200
1260
+ },
1261
+ {
1262
+ "epoch": 5.399408284023669,
1263
+ "eval_class_f1": {
1264
+ "neg": 0.7969348659003831,
1265
+ "neu": 0.7798306389530408,
1266
+ "pos": 0.5721212121212121,
1267
+ "q": 0.3
1268
+ },
1269
+ "eval_loss": 1.4431192874908447,
1270
+ "eval_macro_average_f1": 0.612221679243659,
1271
+ "eval_micro_average_f1": 0.7408485856905158,
1272
+ "eval_runtime": 7.3682,
1273
+ "eval_samples_per_second": 326.266,
1274
+ "eval_steps_per_second": 20.493,
1275
+ "step": 7300
1276
+ },
1277
+ {
1278
+ "epoch": 5.4733727810650885,
1279
+ "eval_class_f1": {
1280
+ "neg": 0.7940717628705148,
1281
+ "neu": 0.783072817384674,
1282
+ "pos": 0.5703883495145632,
1283
+ "q": 0.3037974683544304
1284
+ },
1285
+ "eval_loss": 1.4316595792770386,
1286
+ "eval_macro_average_f1": 0.6128325995310456,
1287
+ "eval_micro_average_f1": 0.7416805324459235,
1288
+ "eval_runtime": 7.3736,
1289
+ "eval_samples_per_second": 326.03,
1290
+ "eval_steps_per_second": 20.479,
1291
+ "step": 7400
1292
+ },
1293
+ {
1294
+ "epoch": 5.547337278106509,
1295
+ "grad_norm": 0.4265735149383545,
1296
+ "learning_rate": 2.543835616438356e-06,
1297
+ "loss": 0.0933,
1298
+ "step": 7500
1299
+ },
1300
+ {
1301
+ "epoch": 5.547337278106509,
1302
+ "eval_class_f1": {
1303
+ "neg": 0.7978311386522074,
1304
+ "neu": 0.7788089713843775,
1305
+ "pos": 0.567409144196952,
1306
+ "q": 0.30769230769230765
1307
+ },
1308
+ "eval_loss": 1.4399964809417725,
1309
+ "eval_macro_average_f1": 0.6129353904814612,
1310
+ "eval_micro_average_f1": 0.7387687188019967,
1311
+ "eval_runtime": 7.2697,
1312
+ "eval_samples_per_second": 330.689,
1313
+ "eval_steps_per_second": 20.771,
1314
+ "step": 7500
1315
+ },
1316
+ {
1317
+ "epoch": 5.621301775147929,
1318
+ "eval_class_f1": {
1319
+ "neg": 0.7984375,
1320
+ "neu": 0.7812379853902346,
1321
+ "pos": 0.5714285714285714,
1322
+ "q": 0.32500000000000007
1323
+ },
1324
+ "eval_loss": 1.4240373373031616,
1325
+ "eval_macro_average_f1": 0.6190260142047015,
1326
+ "eval_micro_average_f1": 0.7412645590682196,
1327
+ "eval_runtime": 7.4341,
1328
+ "eval_samples_per_second": 323.375,
1329
+ "eval_steps_per_second": 20.312,
1330
+ "step": 7600
1331
+ },
1332
+ {
1333
+ "epoch": 5.695266272189349,
1334
+ "eval_class_f1": {
1335
+ "neg": 0.7987470634299139,
1336
+ "neu": 0.7843286420692278,
1337
+ "pos": 0.5703883495145632,
1338
+ "q": 0.30769230769230765
1339
+ },
1340
+ "eval_loss": 1.4332064390182495,
1341
+ "eval_macro_average_f1": 0.6152890906765031,
1342
+ "eval_micro_average_f1": 0.7437603993344426,
1343
+ "eval_runtime": 7.4434,
1344
+ "eval_samples_per_second": 322.969,
1345
+ "eval_steps_per_second": 20.286,
1346
+ "step": 7700
1347
+ },
1348
+ {
1349
+ "epoch": 5.769230769230769,
1350
+ "eval_class_f1": {
1351
+ "neg": 0.7981220657276996,
1352
+ "neu": 0.781874039938556,
1353
+ "pos": 0.5731132075471698,
1354
+ "q": 0.30769230769230765
1355
+ },
1356
+ "eval_loss": 1.4344979524612427,
1357
+ "eval_macro_average_f1": 0.6152004052264332,
1358
+ "eval_micro_average_f1": 0.7416805324459235,
1359
+ "eval_runtime": 7.3808,
1360
+ "eval_samples_per_second": 325.708,
1361
+ "eval_steps_per_second": 20.458,
1362
+ "step": 7800
1363
+ },
1364
+ {
1365
+ "epoch": 5.84319526627219,
1366
+ "eval_class_f1": {
1367
+ "neg": 0.7990654205607477,
1368
+ "neu": 0.7815384615384616,
1369
+ "pos": 0.5724465558194775,
1370
+ "q": 0.3414634146341463
1371
+ },
1372
+ "eval_loss": 1.4412455558776855,
1373
+ "eval_macro_average_f1": 0.6236284631382082,
1374
+ "eval_micro_average_f1": 0.7420965058236273,
1375
+ "eval_runtime": 7.3915,
1376
+ "eval_samples_per_second": 325.237,
1377
+ "eval_steps_per_second": 20.429,
1378
+ "step": 7900
1379
+ },
1380
+ {
1381
+ "epoch": 5.9171597633136095,
1382
+ "grad_norm": 16.41318702697754,
1383
+ "learning_rate": 4.89041095890411e-07,
1384
+ "loss": 0.1006,
1385
+ "step": 8000
1386
+ },
1387
+ {
1388
+ "epoch": 5.9171597633136095,
1389
+ "eval_class_f1": {
1390
+ "neg": 0.7987519500780033,
1391
+ "neu": 0.7813098429720413,
1392
+ "pos": 0.5700598802395208,
1393
+ "q": 0.32500000000000007
1394
+ },
1395
+ "eval_loss": 1.4469937086105347,
1396
+ "eval_macro_average_f1": 0.6187804183223914,
1397
+ "eval_micro_average_f1": 0.7416805324459235,
1398
+ "eval_runtime": 7.3689,
1399
+ "eval_samples_per_second": 326.236,
1400
+ "eval_steps_per_second": 20.492,
1401
+ "step": 8000
1402
+ },
1403
+ {
1404
+ "epoch": 5.991124260355029,
1405
+ "eval_class_f1": {
1406
+ "neg": 0.7990654205607477,
1407
+ "neu": 0.781441717791411,
1408
+ "pos": 0.569377990430622,
1409
+ "q": 0.32500000000000007
1410
+ },
1411
+ "eval_loss": 1.4454258680343628,
1412
+ "eval_macro_average_f1": 0.6187212821956952,
1413
+ "eval_micro_average_f1": 0.7416805324459235,
1414
+ "eval_runtime": 7.448,
1415
+ "eval_samples_per_second": 322.77,
1416
+ "eval_steps_per_second": 20.274,
1417
+ "step": 8100
1418
+ }
1419
+ ],
1420
+ "logging_steps": 500,
1421
+ "max_steps": 8112,
1422
+ "num_input_tokens_seen": 0,
1423
+ "num_train_epochs": 6,
1424
+ "save_steps": 100,
1425
+ "total_flos": 1.0485727069042368e+16,
1426
+ "train_batch_size": 16,
1427
+ "trial_name": null,
1428
+ "trial_params": null
1429
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d136bd3a72331c1ee1902b7c4073c9b6bb6f902bd3ce56bb0ff2ee4eab86fe5c
3
+ size 5048