Upload 12 files
Browse files- .gitattributes +1 -0
- added_tokens.json +837 -0
- config.json +40 -0
- model.safetensors +3 -0
- optimizer.pt +3 -0
- rng_state.pth +3 -0
- scheduler.pt +3 -0
- sentencepiece.bpe.model +3 -0
- special_tokens_map.json +56 -0
- tokenizer.json +3 -0
- tokenizer_config.json +0 -0
- trainer_state.json +1429 -0
- training_args.bin +3 -0
.gitattributes
CHANGED
@@ -34,3 +34,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
checkpoint-8100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
|
|
|
34 |
*.zst filter=lfs diff=lfs merge=lfs -text
|
35 |
*tfevents* filter=lfs diff=lfs merge=lfs -text
|
36 |
checkpoint-8100/tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
37 |
+
tokenizer.json filter=lfs diff=lfs merge=lfs -text
|
added_tokens.json
ADDED
@@ -0,0 +1,837 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"‼": 248733,
|
3 |
+
"⁉": 248850,
|
4 |
+
"⌛": 248707,
|
5 |
+
"⌨": 248450,
|
6 |
+
"⏏": 248572,
|
7 |
+
"⏫": 248890,
|
8 |
+
"⏭": 249212,
|
9 |
+
"⏮": 248920,
|
10 |
+
"⏯": 248576,
|
11 |
+
"⏲": 248922,
|
12 |
+
"⏸": 249193,
|
13 |
+
"⏹": 249213,
|
14 |
+
"Ⓜ": 248957,
|
15 |
+
"♉": 248638,
|
16 |
+
"♊": 248741,
|
17 |
+
"♌": 249062,
|
18 |
+
"♍": 248609,
|
19 |
+
"♎": 248611,
|
20 |
+
"♏": 249129,
|
21 |
+
"♑": 248994,
|
22 |
+
"♓": 248790,
|
23 |
+
"♾": 248725,
|
24 |
+
"♿": 248454,
|
25 |
+
"⚒": 248845,
|
26 |
+
"⚖": 248534,
|
27 |
+
"⚗": 248943,
|
28 |
+
"⚙": 248676,
|
29 |
+
"⚛": 248761,
|
30 |
+
"⚧": 248971,
|
31 |
+
"⚰": 248930,
|
32 |
+
"⚱": 249010,
|
33 |
+
"⛈": 249232,
|
34 |
+
"⛎": 248516,
|
35 |
+
"⛏": 248490,
|
36 |
+
"⛑": 248575,
|
37 |
+
"⛩": 248953,
|
38 |
+
"⛪": 249120,
|
39 |
+
"⛲": 249141,
|
40 |
+
"⛴": 248779,
|
41 |
+
"⛷": 248649,
|
42 |
+
"⛸": 248693,
|
43 |
+
"⛹": 249260,
|
44 |
+
"⛽": 249248,
|
45 |
+
"✝": 248664,
|
46 |
+
"➗": 248803,
|
47 |
+
"➰": 248665,
|
48 |
+
"➿": 248458,
|
49 |
+
"⬛": 249000,
|
50 |
+
"⬜": 248821,
|
51 |
+
"〽": 248791,
|
52 |
+
"㊗": 248521,
|
53 |
+
"㊙": 249060,
|
54 |
+
"🀄": 248929,
|
55 |
+
"🃏": 248468,
|
56 |
+
"🅿": 248755,
|
57 |
+
"🆎": 248619,
|
58 |
+
"🆒": 248798,
|
59 |
+
"🆕": 248655,
|
60 |
+
"🆖": 248837,
|
61 |
+
"🆗": 248917,
|
62 |
+
"🆙": 248978,
|
63 |
+
"🆚": 248972,
|
64 |
+
"🈁": 248495,
|
65 |
+
"🈂": 248844,
|
66 |
+
"🈚": 249053,
|
67 |
+
"🈯": 248568,
|
68 |
+
"🈲": 248512,
|
69 |
+
"🈳": 248785,
|
70 |
+
"🈴": 249003,
|
71 |
+
"🈵": 248752,
|
72 |
+
"🈶": 248502,
|
73 |
+
"🈷": 249153,
|
74 |
+
"🈸": 249079,
|
75 |
+
"🈹": 248690,
|
76 |
+
"🈺": 248472,
|
77 |
+
"🉐": 249070,
|
78 |
+
"🉑": 249224,
|
79 |
+
"🌁": 248694,
|
80 |
+
"🌂": 248584,
|
81 |
+
"🌃": 248739,
|
82 |
+
"🌇": 248528,
|
83 |
+
"🌉": 248964,
|
84 |
+
"🌌": 248587,
|
85 |
+
"🌑": 249069,
|
86 |
+
"🌒": 249187,
|
87 |
+
"🌓": 248598,
|
88 |
+
"🌔": 248514,
|
89 |
+
"🌕": 248452,
|
90 |
+
"🌖": 249242,
|
91 |
+
"🌗": 248478,
|
92 |
+
"🌘": 248782,
|
93 |
+
"🌚": 249190,
|
94 |
+
"🌛": 248984,
|
95 |
+
"🌜": 248635,
|
96 |
+
"🌠": 248623,
|
97 |
+
"🌡": 248904,
|
98 |
+
"🌤": 248812,
|
99 |
+
"🌥": 248881,
|
100 |
+
"🌦": 249172,
|
101 |
+
"🌧": 248666,
|
102 |
+
"🌨": 248730,
|
103 |
+
"🌩": 248795,
|
104 |
+
"🌪": 249022,
|
105 |
+
"🌫": 248734,
|
106 |
+
"🌬": 248530,
|
107 |
+
"🌭": 248868,
|
108 |
+
"🌮": 248976,
|
109 |
+
"🌯": 248716,
|
110 |
+
"🌰": 248872,
|
111 |
+
"🌵": 249081,
|
112 |
+
"🍈": 249173,
|
113 |
+
"🍐": 248933,
|
114 |
+
"🍖": 248742,
|
115 |
+
"🍗": 248433,
|
116 |
+
"🍘": 248647,
|
117 |
+
"🍙": 249057,
|
118 |
+
"🍛": 248869,
|
119 |
+
"🍜": 248979,
|
120 |
+
"🍝": 249087,
|
121 |
+
"🍞": 248697,
|
122 |
+
"🍟": 248748,
|
123 |
+
"🍠": 248735,
|
124 |
+
"🍡": 248591,
|
125 |
+
"🍢": 248833,
|
126 |
+
"🍣": 248744,
|
127 |
+
"🍤": 248777,
|
128 |
+
"🍥": 248722,
|
129 |
+
"🍧": 248692,
|
130 |
+
"🍩": 248935,
|
131 |
+
"🍭": 248923,
|
132 |
+
"🍮": 249067,
|
133 |
+
"🍱": 248715,
|
134 |
+
"🍵": 248873,
|
135 |
+
"🍶": 248814,
|
136 |
+
"🍸": 249251,
|
137 |
+
"🍼": 248650,
|
138 |
+
"🎃": 248736,
|
139 |
+
"🎆": 248678,
|
140 |
+
"🎇": 248589,
|
141 |
+
"🎌": 248805,
|
142 |
+
"🎍": 248892,
|
143 |
+
"🎎": 249231,
|
144 |
+
"🎏": 248670,
|
145 |
+
"🎐": 249014,
|
146 |
+
"🎑": 248954,
|
147 |
+
"🎒": 249072,
|
148 |
+
"🎚": 249103,
|
149 |
+
"🎛": 248704,
|
150 |
+
"🎟": 248802,
|
151 |
+
"🎠": 249005,
|
152 |
+
"🎡": 248932,
|
153 |
+
"🎣": 249137,
|
154 |
+
"🎦": 248858,
|
155 |
+
"🎪": 249086,
|
156 |
+
"🎫": 249258,
|
157 |
+
"🎰": 248683,
|
158 |
+
"🎱": 248961,
|
159 |
+
"🎲": 248455,
|
160 |
+
"🎳": 249073,
|
161 |
+
"🎴": 248769,
|
162 |
+
"🎷": 248451,
|
163 |
+
"🎹": 248503,
|
164 |
+
"🎺": 248586,
|
165 |
+
"🎻": 249148,
|
166 |
+
"🎽": 248577,
|
167 |
+
"🎾": 249108,
|
168 |
+
"🎿": 248915,
|
169 |
+
"🏁": 249031,
|
170 |
+
"🏂": 248792,
|
171 |
+
"🏅": 249063,
|
172 |
+
"🏇": 248612,
|
173 |
+
"🏈": 248891,
|
174 |
+
"🏉": 248434,
|
175 |
+
"🏊": 249052,
|
176 |
+
"🏍": 248525,
|
177 |
+
"🏎": 248992,
|
178 |
+
"🏏": 248515,
|
179 |
+
"🏐": 248660,
|
180 |
+
"🏑": 249144,
|
181 |
+
"🏒": 248866,
|
182 |
+
"🏓": 248459,
|
183 |
+
"🏔": 248588,
|
184 |
+
"🏕": 248916,
|
185 |
+
"🏗": 249098,
|
186 |
+
"🏘": 249199,
|
187 |
+
"🏙": 248567,
|
188 |
+
"🏚": 248888,
|
189 |
+
"🏛": 248914,
|
190 |
+
"🏜": 248685,
|
191 |
+
"🏞": 249200,
|
192 |
+
"🏟": 248815,
|
193 |
+
"🏣": 248828,
|
194 |
+
"🏤": 248773,
|
195 |
+
"🏥": 248835,
|
196 |
+
"🏦": 248553,
|
197 |
+
"🏧": 248642,
|
198 |
+
"🏨": 248561,
|
199 |
+
"🏩": 249182,
|
200 |
+
"🏪": 248999,
|
201 |
+
"🏬": 248996,
|
202 |
+
"🏭": 248934,
|
203 |
+
"🏮": 248644,
|
204 |
+
"🏯": 249175,
|
205 |
+
"🏰": 248774,
|
206 |
+
"🏴": 248501,
|
207 |
+
"🏷": 248427,
|
208 |
+
"🏸": 248804,
|
209 |
+
"🏹": 249130,
|
210 |
+
"🏺": 249250,
|
211 |
+
"🐀": 249257,
|
212 |
+
"🐁": 249252,
|
213 |
+
"🐂": 248621,
|
214 |
+
"🐃": 248695,
|
215 |
+
"🐄": 249194,
|
216 |
+
"🐅": 249117,
|
217 |
+
"🐆": 249026,
|
218 |
+
"🐇": 249015,
|
219 |
+
"🐈": 248527,
|
220 |
+
"🐉": 248462,
|
221 |
+
"🐊": 248574,
|
222 |
+
"🐋": 249208,
|
223 |
+
"🐌": 248880,
|
224 |
+
"🐎": 248479,
|
225 |
+
"🐏": 248865,
|
226 |
+
"🐐": 249198,
|
227 |
+
"🐑": 249122,
|
228 |
+
"🐓": 248680,
|
229 |
+
"🐔": 249068,
|
230 |
+
"🐕": 248756,
|
231 |
+
"🐖": 248603,
|
232 |
+
"🐗": 248750,
|
233 |
+
"🐙": 248701,
|
234 |
+
"🐚": 248886,
|
235 |
+
"🐛": 248540,
|
236 |
+
"🐜": 249249,
|
237 |
+
"🐟": 248912,
|
238 |
+
"🐠": 249253,
|
239 |
+
"🐡": 248480,
|
240 |
+
"🐢": 249210,
|
241 |
+
"🐧": 249254,
|
242 |
+
"🐨": 249019,
|
243 |
+
"🐩": 248580,
|
244 |
+
"🐪": 248775,
|
245 |
+
"🐫": 248505,
|
246 |
+
"🐬": 249002,
|
247 |
+
"🐭": 248615,
|
248 |
+
"🐮": 249074,
|
249 |
+
"🐵": 248533,
|
250 |
+
"🐹": 248441,
|
251 |
+
"🐺": 249055,
|
252 |
+
"🐽": 248602,
|
253 |
+
"🐿": 248823,
|
254 |
+
"👂": 249140,
|
255 |
+
"👓": 249170,
|
256 |
+
"👘": 248542,
|
257 |
+
"👛": 249217,
|
258 |
+
"👝": 248709,
|
259 |
+
"👞": 248728,
|
260 |
+
"👡": 248549,
|
261 |
+
"👢": 248910,
|
262 |
+
"👲": 249001,
|
263 |
+
"👴": 248820,
|
264 |
+
"👵": 248960,
|
265 |
+
"👷": 249146,
|
266 |
+
"👹": 249099,
|
267 |
+
"👺": 249106,
|
268 |
+
"👽": 248883,
|
269 |
+
"👾": 249059,
|
270 |
+
"💂": 248778,
|
271 |
+
"💈": 248554,
|
272 |
+
"💒": 248594,
|
273 |
+
"💤": 248466,
|
274 |
+
"💨": 248913,
|
275 |
+
"💩": 248508,
|
276 |
+
"💱": 249100,
|
277 |
+
"💴": 248931,
|
278 |
+
"💷": 248711,
|
279 |
+
"💹": 249158,
|
280 |
+
"💺": 248831,
|
281 |
+
"💽": 249021,
|
282 |
+
"💾": 249215,
|
283 |
+
"💿": 248604,
|
284 |
+
"📀": 248518,
|
285 |
+
"📁": 248444,
|
286 |
+
"📂": 248496,
|
287 |
+
"📄": 248801,
|
288 |
+
"📆": 248487,
|
289 |
+
"📇": 249007,
|
290 |
+
"📈": 248438,
|
291 |
+
"📉": 248617,
|
292 |
+
"📊": 248473,
|
293 |
+
"📏": 248556,
|
294 |
+
"📐": 248679,
|
295 |
+
"📑": 249256,
|
296 |
+
"📒": 248710,
|
297 |
+
"📓": 248632,
|
298 |
+
"📔": 248909,
|
299 |
+
"📗": 248807,
|
300 |
+
"📘": 248708,
|
301 |
+
"📙": 249160,
|
302 |
+
"📛": 248484,
|
303 |
+
"📟": 248727,
|
304 |
+
"📠": 249065,
|
305 |
+
"📡": 248532,
|
306 |
+
"📤": 248718,
|
307 |
+
"📨": 249196,
|
308 |
+
"📪": 248565,
|
309 |
+
"📫": 248469,
|
310 |
+
"📭": 248998,
|
311 |
+
"📯": 249220,
|
312 |
+
"📳": 248471,
|
313 |
+
"📴": 249004,
|
314 |
+
"📵": 249013,
|
315 |
+
"📶": 248969,
|
316 |
+
"📻": 249084,
|
317 |
+
"📼": 249230,
|
318 |
+
"📿": 249181,
|
319 |
+
"🔀": 248946,
|
320 |
+
"🔂": 248863,
|
321 |
+
"🔃": 249050,
|
322 |
+
"🔄": 248995,
|
323 |
+
"🔆": 249046,
|
324 |
+
"🔇": 249009,
|
325 |
+
"🔈": 248559,
|
326 |
+
"🔉": 248590,
|
327 |
+
"🔋": 248907,
|
328 |
+
"🔌": 248842,
|
329 |
+
"🔏": 248622,
|
330 |
+
"🔐": 248546,
|
331 |
+
"🔑": 248937,
|
332 |
+
"🔒": 248498,
|
333 |
+
"🔓": 249058,
|
334 |
+
"🔕": 248780,
|
335 |
+
"🔙": 249206,
|
336 |
+
"🔚": 248903,
|
337 |
+
"🔛": 248671,
|
338 |
+
"🔟": 249189,
|
339 |
+
"🔠": 249075,
|
340 |
+
"🔡": 248600,
|
341 |
+
"🔢": 249142,
|
342 |
+
"🔣": 248579,
|
343 |
+
"🔤": 248997,
|
344 |
+
"🔦": 248813,
|
345 |
+
"🔧": 248618,
|
346 |
+
"🔨": 248849,
|
347 |
+
"🔩": 248781,
|
348 |
+
"🔪": 248906,
|
349 |
+
"🔭": 249261,
|
350 |
+
"🔯": 248674,
|
351 |
+
"🔲": 248988,
|
352 |
+
"🔳": 248985,
|
353 |
+
"🔼": 248754,
|
354 |
+
"🔽": 248581,
|
355 |
+
"🕋": 249027,
|
356 |
+
"🕌": 249241,
|
357 |
+
"🕍": 248719,
|
358 |
+
"🕎": 248626,
|
359 |
+
"🕐": 248855,
|
360 |
+
"🕑": 248818,
|
361 |
+
"🕒": 248485,
|
362 |
+
"🕓": 248465,
|
363 |
+
"🕕": 248494,
|
364 |
+
"🕖": 248847,
|
365 |
+
"🕗": 249221,
|
366 |
+
"🕘": 248552,
|
367 |
+
"🕙": 248539,
|
368 |
+
"🕚": 248607,
|
369 |
+
"🕛": 248889,
|
370 |
+
"🕜": 248513,
|
371 |
+
"🕝": 248763,
|
372 |
+
"🕞": 248601,
|
373 |
+
"🕟": 248884,
|
374 |
+
"🕠": 249109,
|
375 |
+
"🕡": 248982,
|
376 |
+
"🕢": 248896,
|
377 |
+
"🕣": 248854,
|
378 |
+
"🕤": 248682,
|
379 |
+
"🕥": 248627,
|
380 |
+
"🕦": 248938,
|
381 |
+
"🕧": 248482,
|
382 |
+
"🕰": 248810,
|
383 |
+
"🕳": 248700,
|
384 |
+
"🕴": 248483,
|
385 |
+
"🕶": 248688,
|
386 |
+
"🕷": 248437,
|
387 |
+
"🕸": 248749,
|
388 |
+
"🕹": 248871,
|
389 |
+
"🖇": 248563,
|
390 |
+
"🖊": 248760,
|
391 |
+
"🖌": 249209,
|
392 |
+
"🖍": 248560,
|
393 |
+
"🖕": 249105,
|
394 |
+
"🖖": 248864,
|
395 |
+
"🖨": 248947,
|
396 |
+
"🖱": 248784,
|
397 |
+
"🖲": 248766,
|
398 |
+
"🖼": 248506,
|
399 |
+
"🗂": 248631,
|
400 |
+
"🗃": 248497,
|
401 |
+
"🗄": 248980,
|
402 |
+
"🗑": 249088,
|
403 |
+
"🗒": 249048,
|
404 |
+
"🗜": 248764,
|
405 |
+
"🗝": 248629,
|
406 |
+
"🗞": 248867,
|
407 |
+
"🗡": 249134,
|
408 |
+
"🗨": 249150,
|
409 |
+
"🗯": 248585,
|
410 |
+
"🗳": 248461,
|
411 |
+
"🗺": 248493,
|
412 |
+
"🗻": 249133,
|
413 |
+
"🗾": 248464,
|
414 |
+
"🗿": 249135,
|
415 |
+
"😧": 249227,
|
416 |
+
"😸": 248908,
|
417 |
+
"😺": 248851,
|
418 |
+
"😼": 248614,
|
419 |
+
"😽": 249121,
|
420 |
+
"😾": 248675,
|
421 |
+
"😿": 248796,
|
422 |
+
"🙉": 248633,
|
423 |
+
"🙍": 249066,
|
424 |
+
"🙎": 248808,
|
425 |
+
"🚁": 248677,
|
426 |
+
"🚂": 248504,
|
427 |
+
"🚃": 249184,
|
428 |
+
"🚄": 248648,
|
429 |
+
"🚅": 249102,
|
430 |
+
"🚆": 248974,
|
431 |
+
"🚈": 248901,
|
432 |
+
"🚉": 248856,
|
433 |
+
"🚊": 249178,
|
434 |
+
"🚋": 248878,
|
435 |
+
"🚍": 249016,
|
436 |
+
"🚎": 249163,
|
437 |
+
"🚏": 249119,
|
438 |
+
"🚐": 248731,
|
439 |
+
"🚑": 249188,
|
440 |
+
"🚒": 249225,
|
441 |
+
"🚓": 248817,
|
442 |
+
"🚔": 249259,
|
443 |
+
"🚕": 248608,
|
444 |
+
"🚖": 248667,
|
445 |
+
"🚜": 249076,
|
446 |
+
"🚝": 248882,
|
447 |
+
"🚞": 248596,
|
448 |
+
"🚟": 249154,
|
449 |
+
"🚠": 248640,
|
450 |
+
"🚡": 249020,
|
451 |
+
"🚢": 248547,
|
452 |
+
"🚣": 249186,
|
453 |
+
"🚤": 249204,
|
454 |
+
"🚥": 249164,
|
455 |
+
"🚦": 248717,
|
456 |
+
"🚧": 249143,
|
457 |
+
"🚪": 248861,
|
458 |
+
"🚬": 248918,
|
459 |
+
"🚭": 248836,
|
460 |
+
"🚮": 248583,
|
461 |
+
"🚯": 248659,
|
462 |
+
"🚰": 249202,
|
463 |
+
"🚱": 248786,
|
464 |
+
"🚳": 248758,
|
465 |
+
"🚷": 249149,
|
466 |
+
"🚸": 248550,
|
467 |
+
"🚹": 249024,
|
468 |
+
"🚺": 249044,
|
469 |
+
"🚻": 248636,
|
470 |
+
"🚼": 249214,
|
471 |
+
"🚽": 248726,
|
472 |
+
"🚾": 249205,
|
473 |
+
"🚿": 248941,
|
474 |
+
"🛀": 249097,
|
475 |
+
"🛁": 248430,
|
476 |
+
"🛂": 248770,
|
477 |
+
"🛃": 248732,
|
478 |
+
"🛄": 248879,
|
479 |
+
"🛅": 249041,
|
480 |
+
"🛋": 249203,
|
481 |
+
"🛎": 249113,
|
482 |
+
"🛏": 249078,
|
483 |
+
"🛐": 248830,
|
484 |
+
"🛕": 248859,
|
485 |
+
"🛖": 248435,
|
486 |
+
"🛗": 248551,
|
487 |
+
"🛜": 248519,
|
488 |
+
"🛝": 248510,
|
489 |
+
"🛞": 248897,
|
490 |
+
"🛟": 248981,
|
491 |
+
"🛠": 248966,
|
492 |
+
"🛡": 248811,
|
493 |
+
"🛢": 249240,
|
494 |
+
"🛣": 249040,
|
495 |
+
"🛤": 248973,
|
496 |
+
"🛥": 248436,
|
497 |
+
"🛩": 249156,
|
498 |
+
"🛬": 248951,
|
499 |
+
"🛰": 248656,
|
500 |
+
"🛳": 249123,
|
501 |
+
"🛴": 249111,
|
502 |
+
"🛵": 248857,
|
503 |
+
"🛶": 248940,
|
504 |
+
"🛷": 248824,
|
505 |
+
"🛸": 248562,
|
506 |
+
"🛹": 248669,
|
507 |
+
"🛺": 248569,
|
508 |
+
"🛻": 249017,
|
509 |
+
"🛼": 248991,
|
510 |
+
"🟠": 249195,
|
511 |
+
"🟡": 248460,
|
512 |
+
"🟢": 248489,
|
513 |
+
"🟣": 248737,
|
514 |
+
"🟤": 248936,
|
515 |
+
"🟥": 249239,
|
516 |
+
"🟦": 248491,
|
517 |
+
"🟧": 249166,
|
518 |
+
"🟨": 248765,
|
519 |
+
"🟩": 248439,
|
520 |
+
"🟪": 248787,
|
521 |
+
"🟫": 249255,
|
522 |
+
"🟰": 248874,
|
523 |
+
"🤌": 248620,
|
524 |
+
"🤍": 248967,
|
525 |
+
"🤎": 249116,
|
526 |
+
"🤏": 248699,
|
527 |
+
"🤐": 248806,
|
528 |
+
"🤒": 248610,
|
529 |
+
"🤕": 248475,
|
530 |
+
"🤚": 249011,
|
531 |
+
"🤛": 249107,
|
532 |
+
"🤜": 249152,
|
533 |
+
"🤠": 248862,
|
534 |
+
"🤢": 249023,
|
535 |
+
"🤥": 248628,
|
536 |
+
"🤧": 249064,
|
537 |
+
"🤫": 248706,
|
538 |
+
"🤬": 249219,
|
539 |
+
"🤮": 248819,
|
540 |
+
"🤯": 249096,
|
541 |
+
"🤰": 249207,
|
542 |
+
"🤱": 248925,
|
543 |
+
"🤳": 248541,
|
544 |
+
"🤴": 248573,
|
545 |
+
"🤶": 249094,
|
546 |
+
"🤸": 248809,
|
547 |
+
"🤹": 248570,
|
548 |
+
"🤺": 248443,
|
549 |
+
"🤼": 249115,
|
550 |
+
"🤽": 248794,
|
551 |
+
"🤾": 248721,
|
552 |
+
"🤿": 249191,
|
553 |
+
"🥁": 249118,
|
554 |
+
"🥃": 248788,
|
555 |
+
"🥄": 248944,
|
556 |
+
"🥅": 249101,
|
557 |
+
"🥈": 248899,
|
558 |
+
"🥉": 248762,
|
559 |
+
"🥊": 248768,
|
560 |
+
"🥋": 248772,
|
561 |
+
"🥌": 249177,
|
562 |
+
"🥍": 249051,
|
563 |
+
"🥎": 248645,
|
564 |
+
"🥏": 248557,
|
565 |
+
"🥐": 249131,
|
566 |
+
"🥑": 248511,
|
567 |
+
"🥒": 249229,
|
568 |
+
"🥓": 248751,
|
569 |
+
"🥔": 248905,
|
570 |
+
"🥕": 248838,
|
571 |
+
"🥖": 248470,
|
572 |
+
"🥗": 248745,
|
573 |
+
"🥘": 248445,
|
574 |
+
"🥙": 248463,
|
575 |
+
"🥚": 248520,
|
576 |
+
"🥛": 248582,
|
577 |
+
"🥜": 249083,
|
578 |
+
"🥝": 248848,
|
579 |
+
"🥞": 248453,
|
580 |
+
"🥟": 249033,
|
581 |
+
"🥠": 248942,
|
582 |
+
"🥡": 248720,
|
583 |
+
"🥢": 249168,
|
584 |
+
"🥤": 248949,
|
585 |
+
"🥥": 248956,
|
586 |
+
"🥦": 248432,
|
587 |
+
"🥧": 248595,
|
588 |
+
"🥨": 248799,
|
589 |
+
"🥩": 248928,
|
590 |
+
"🥪": 248963,
|
591 |
+
"🥫": 248509,
|
592 |
+
"🥬": 249237,
|
593 |
+
"🥭": 248634,
|
594 |
+
"🥮": 248446,
|
595 |
+
"🥯": 248757,
|
596 |
+
"🥱": 249244,
|
597 |
+
"🥲": 248429,
|
598 |
+
"🥳": 249128,
|
599 |
+
"🥴": 248860,
|
600 |
+
"🥵": 249029,
|
601 |
+
"🥶": 249147,
|
602 |
+
"🥷": 248702,
|
603 |
+
"🥸": 248486,
|
604 |
+
"🥹": 248431,
|
605 |
+
"🥻": 249039,
|
606 |
+
"🥼": 248832,
|
607 |
+
"🥽": 248875,
|
608 |
+
"🥾": 249056,
|
609 |
+
"🥿": 249138,
|
610 |
+
"🦀": 249201,
|
611 |
+
"🦂": 249034,
|
612 |
+
"🦃": 248834,
|
613 |
+
"🦆": 249042,
|
614 |
+
"🦇": 248543,
|
615 |
+
"🦈": 249139,
|
616 |
+
"🦉": 249008,
|
617 |
+
"🦌": 248625,
|
618 |
+
"🦍": 248713,
|
619 |
+
"🦎": 248911,
|
620 |
+
"🦏": 248555,
|
621 |
+
"🦐": 248950,
|
622 |
+
"🦑": 248746,
|
623 |
+
"🦒": 248843,
|
624 |
+
"🦓": 248789,
|
625 |
+
"🦔": 248641,
|
626 |
+
"🦕": 248654,
|
627 |
+
"🦖": 248686,
|
628 |
+
"🦗": 249235,
|
629 |
+
"🦘": 248825,
|
630 |
+
"🦙": 248523,
|
631 |
+
"🦚": 248797,
|
632 |
+
"🦛": 249080,
|
633 |
+
"🦜": 248816,
|
634 |
+
"🦝": 248661,
|
635 |
+
"🦞": 249045,
|
636 |
+
"🦟": 248753,
|
637 |
+
"🦠": 249085,
|
638 |
+
"🦡": 249246,
|
639 |
+
"🦢": 248965,
|
640 |
+
"🦣": 248578,
|
641 |
+
"🦤": 248662,
|
642 |
+
"🦥": 248939,
|
643 |
+
"🦦": 249161,
|
644 |
+
"🦧": 249030,
|
645 |
+
"🦨": 248776,
|
646 |
+
"🦩": 248624,
|
647 |
+
"🦪": 248738,
|
648 |
+
"🦫": 248566,
|
649 |
+
"🦬": 249245,
|
650 |
+
"🦭": 249159,
|
651 |
+
"🦮": 248517,
|
652 |
+
"🦯": 249089,
|
653 |
+
"🦰": 249132,
|
654 |
+
"🦱": 248613,
|
655 |
+
"🦲": 249047,
|
656 |
+
"🦳": 248927,
|
657 |
+
"🦴": 248691,
|
658 |
+
"🦵": 248885,
|
659 |
+
"🦶": 248876,
|
660 |
+
"🦷": 248771,
|
661 |
+
"🦸": 248538,
|
662 |
+
"🦹": 248958,
|
663 |
+
"🦺": 248948,
|
664 |
+
"🦻": 248729,
|
665 |
+
"🦼": 248639,
|
666 |
+
"🦽": 248959,
|
667 |
+
"🦾": 248507,
|
668 |
+
"🦿": 249222,
|
669 |
+
"🧀": 248712,
|
670 |
+
"🧁": 248840,
|
671 |
+
"🧂": 249018,
|
672 |
+
"🧃": 249125,
|
673 |
+
"🧄": 248723,
|
674 |
+
"🧅": 248895,
|
675 |
+
"🧆": 248571,
|
676 |
+
"🧇": 248767,
|
677 |
+
"🧈": 249211,
|
678 |
+
"🧉": 248684,
|
679 |
+
"🧊": 249092,
|
680 |
+
"🧋": 248696,
|
681 |
+
"🧌": 249061,
|
682 |
+
"🧍": 249218,
|
683 |
+
"🧎": 248537,
|
684 |
+
"🧏": 248544,
|
685 |
+
"🧐": 249095,
|
686 |
+
"🧑": 248672,
|
687 |
+
"🧒": 248536,
|
688 |
+
"🧓": 249145,
|
689 |
+
"🧔": 249110,
|
690 |
+
"🧕": 249176,
|
691 |
+
"🧖": 249025,
|
692 |
+
"🧗": 248853,
|
693 |
+
"🧘": 248663,
|
694 |
+
"🧙": 248605,
|
695 |
+
"🧛": 248887,
|
696 |
+
"🧝": 248524,
|
697 |
+
"🧞": 248558,
|
698 |
+
"🧟": 249197,
|
699 |
+
"🧠": 248893,
|
700 |
+
"🧢": 249035,
|
701 |
+
"🧣": 249155,
|
702 |
+
"🧤": 248740,
|
703 |
+
"🧥": 248657,
|
704 |
+
"🧦": 248900,
|
705 |
+
"🧧": 248428,
|
706 |
+
"🧨": 248793,
|
707 |
+
"🧩": 249234,
|
708 |
+
"🧪": 249192,
|
709 |
+
"🧫": 248975,
|
710 |
+
"🧬": 248681,
|
711 |
+
"🧭": 249180,
|
712 |
+
"🧮": 249049,
|
713 |
+
"🧯": 249167,
|
714 |
+
"🧰": 248593,
|
715 |
+
"🧱": 249112,
|
716 |
+
"🧲": 249082,
|
717 |
+
"🧳": 249179,
|
718 |
+
"🧴": 249104,
|
719 |
+
"🧵": 249169,
|
720 |
+
"🧶": 248651,
|
721 |
+
"🧷": 248977,
|
722 |
+
"🧸": 248488,
|
723 |
+
"🧹": 249233,
|
724 |
+
"🧺": 248921,
|
725 |
+
"🧻": 249236,
|
726 |
+
"🧼": 248993,
|
727 |
+
"🧽": 248545,
|
728 |
+
"🧾": 248827,
|
729 |
+
"🧿": 249114,
|
730 |
+
"🩰": 248852,
|
731 |
+
"🩱": 248529,
|
732 |
+
"🩲": 249171,
|
733 |
+
"🩳": 248970,
|
734 |
+
"🩴": 248705,
|
735 |
+
"🩵": 248989,
|
736 |
+
"🩶": 249243,
|
737 |
+
"🩷": 248945,
|
738 |
+
"🩸": 248839,
|
739 |
+
"🩹": 248477,
|
740 |
+
"🩺": 248983,
|
741 |
+
"🩻": 248474,
|
742 |
+
"🩼": 249127,
|
743 |
+
"🪀": 249216,
|
744 |
+
"🪁": 249183,
|
745 |
+
"🪂": 248448,
|
746 |
+
"🪃": 248689,
|
747 |
+
"🪄": 248653,
|
748 |
+
"🪅": 248440,
|
749 |
+
"🪆": 248616,
|
750 |
+
"🪇": 249238,
|
751 |
+
"🪈": 249038,
|
752 |
+
"🪐": 249032,
|
753 |
+
"🪑": 249185,
|
754 |
+
"🪒": 248467,
|
755 |
+
"🪓": 249028,
|
756 |
+
"🪔": 248919,
|
757 |
+
"🪕": 248759,
|
758 |
+
"🪖": 248447,
|
759 |
+
"🪗": 249006,
|
760 |
+
"🪘": 248643,
|
761 |
+
"🪙": 248476,
|
762 |
+
"🪚": 249165,
|
763 |
+
"🪛": 249077,
|
764 |
+
"🪜": 248597,
|
765 |
+
"🪝": 248714,
|
766 |
+
"🪞": 248687,
|
767 |
+
"🪟": 249226,
|
768 |
+
"🪠": 248499,
|
769 |
+
"🪡": 248841,
|
770 |
+
"🪢": 249124,
|
771 |
+
"🪣": 248955,
|
772 |
+
"🪤": 248668,
|
773 |
+
"🪥": 248599,
|
774 |
+
"🪦": 248902,
|
775 |
+
"🪧": 248535,
|
776 |
+
"🪨": 248673,
|
777 |
+
"🪩": 248962,
|
778 |
+
"🪪": 249223,
|
779 |
+
"🪫": 248990,
|
780 |
+
"🪬": 248564,
|
781 |
+
"🪭": 248926,
|
782 |
+
"🪮": 248630,
|
783 |
+
"🪯": 248822,
|
784 |
+
"🪰": 248829,
|
785 |
+
"🪱": 249162,
|
786 |
+
"🪲": 248522,
|
787 |
+
"🪳": 249037,
|
788 |
+
"🪴": 248457,
|
789 |
+
"🪵": 248877,
|
790 |
+
"🪶": 249151,
|
791 |
+
"🪷": 249036,
|
792 |
+
"🪸": 248492,
|
793 |
+
"🪹": 248592,
|
794 |
+
"🪺": 248646,
|
795 |
+
"🪻": 248898,
|
796 |
+
"🪼": 248952,
|
797 |
+
"🪽": 249136,
|
798 |
+
"🪿": 249071,
|
799 |
+
"🫀": 248442,
|
800 |
+
"🫁": 248449,
|
801 |
+
"🫂": 248606,
|
802 |
+
"🫃": 248637,
|
803 |
+
"🫄": 248658,
|
804 |
+
"🫅": 248846,
|
805 |
+
"🫎": 248894,
|
806 |
+
"🫏": 248826,
|
807 |
+
"🫐": 248500,
|
808 |
+
"🫑": 248456,
|
809 |
+
"🫒": 248703,
|
810 |
+
"🫓": 248743,
|
811 |
+
"🫔": 249126,
|
812 |
+
"🫕": 249174,
|
813 |
+
"🫖": 248724,
|
814 |
+
"🫗": 248652,
|
815 |
+
"🫘": 249093,
|
816 |
+
"🫙": 249054,
|
817 |
+
"🫚": 248800,
|
818 |
+
"🫛": 248698,
|
819 |
+
"🫠": 249247,
|
820 |
+
"🫡": 248968,
|
821 |
+
"🫢": 248783,
|
822 |
+
"🫣": 248481,
|
823 |
+
"🫤": 248986,
|
824 |
+
"🫥": 249043,
|
825 |
+
"🫦": 248526,
|
826 |
+
"🫧": 248548,
|
827 |
+
"🫨": 248747,
|
828 |
+
"🫰": 249012,
|
829 |
+
"🫱": 249090,
|
830 |
+
"🫲": 248924,
|
831 |
+
"🫳": 249228,
|
832 |
+
"🫴": 248987,
|
833 |
+
"🫵": 248531,
|
834 |
+
"🫶": 249157,
|
835 |
+
"🫷": 249091,
|
836 |
+
"🫸": 248870
|
837 |
+
}
|
config.json
ADDED
@@ -0,0 +1,40 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_name_or_path": "clicknext/phayathaibert",
|
3 |
+
"architectures": [
|
4 |
+
"CamembertForSequenceClassification"
|
5 |
+
],
|
6 |
+
"attention_probs_dropout_prob": 0.1,
|
7 |
+
"bos_token_id": 0,
|
8 |
+
"classifier_dropout": null,
|
9 |
+
"eos_token_id": 2,
|
10 |
+
"hidden_act": "gelu",
|
11 |
+
"hidden_dropout_prob": 0.1,
|
12 |
+
"hidden_size": 768,
|
13 |
+
"id2label": {
|
14 |
+
"0": "LABEL_0",
|
15 |
+
"1": "LABEL_1",
|
16 |
+
"2": "LABEL_2",
|
17 |
+
"3": "LABEL_3"
|
18 |
+
},
|
19 |
+
"initializer_range": 0.02,
|
20 |
+
"intermediate_size": 3072,
|
21 |
+
"label2id": {
|
22 |
+
"LABEL_0": 0,
|
23 |
+
"LABEL_1": 1,
|
24 |
+
"LABEL_2": 2,
|
25 |
+
"LABEL_3": 3
|
26 |
+
},
|
27 |
+
"layer_norm_eps": 1e-12,
|
28 |
+
"max_position_embeddings": 512,
|
29 |
+
"model_type": "camembert",
|
30 |
+
"num_attention_heads": 12,
|
31 |
+
"num_hidden_layers": 12,
|
32 |
+
"pad_token_id": 1,
|
33 |
+
"position_embedding_type": "absolute",
|
34 |
+
"problem_type": "single_label_classification",
|
35 |
+
"torch_dtype": "float32",
|
36 |
+
"transformers_version": "4.40.1",
|
37 |
+
"type_vocab_size": 1,
|
38 |
+
"use_cache": true,
|
39 |
+
"vocab_size": 249262
|
40 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:aaf08ea07d6399123e24ccea22f9a6192783c65aff174f8e2135aedd03db6c52
|
3 |
+
size 1109931736
|
optimizer.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:46272d55849f78c8946e719105ad77f106ab150686b803da3ac005850cdd8676
|
3 |
+
size 2219983098
|
rng_state.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e0756100fa218e165a9a20bec4d70d745156140c6e5abfd5647935cea6c5bcb5
|
3 |
+
size 14244
|
scheduler.pt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9d432300d416e7d574db386599aa107407f0bd61bdd23f62c7d90a6b7c4ff1d1
|
3 |
+
size 1064
|
sentencepiece.bpe.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e295c936bc0d8b6669ae769a2f8a0363e6d3abcfd8d0869134aa1e903a447d26
|
3 |
+
size 5261686
|
special_tokens_map.json
ADDED
@@ -0,0 +1,56 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"additional_special_tokens": [
|
3 |
+
"<s>NOTUSED",
|
4 |
+
"</s>NOTUSED",
|
5 |
+
"<_>"
|
6 |
+
],
|
7 |
+
"bos_token": {
|
8 |
+
"content": "<s>",
|
9 |
+
"lstrip": false,
|
10 |
+
"normalized": false,
|
11 |
+
"rstrip": false,
|
12 |
+
"single_word": false
|
13 |
+
},
|
14 |
+
"cls_token": {
|
15 |
+
"content": "<s>",
|
16 |
+
"lstrip": false,
|
17 |
+
"normalized": false,
|
18 |
+
"rstrip": false,
|
19 |
+
"single_word": false
|
20 |
+
},
|
21 |
+
"eos_token": {
|
22 |
+
"content": "</s>",
|
23 |
+
"lstrip": false,
|
24 |
+
"normalized": false,
|
25 |
+
"rstrip": false,
|
26 |
+
"single_word": false
|
27 |
+
},
|
28 |
+
"mask_token": {
|
29 |
+
"content": "<mask>",
|
30 |
+
"lstrip": true,
|
31 |
+
"normalized": true,
|
32 |
+
"rstrip": false,
|
33 |
+
"single_word": false
|
34 |
+
},
|
35 |
+
"pad_token": {
|
36 |
+
"content": "<pad>",
|
37 |
+
"lstrip": false,
|
38 |
+
"normalized": false,
|
39 |
+
"rstrip": false,
|
40 |
+
"single_word": false
|
41 |
+
},
|
42 |
+
"sep_token": {
|
43 |
+
"content": "</s>",
|
44 |
+
"lstrip": false,
|
45 |
+
"normalized": false,
|
46 |
+
"rstrip": false,
|
47 |
+
"single_word": false
|
48 |
+
},
|
49 |
+
"unk_token": {
|
50 |
+
"content": "<unk>",
|
51 |
+
"lstrip": false,
|
52 |
+
"normalized": false,
|
53 |
+
"rstrip": false,
|
54 |
+
"single_word": false
|
55 |
+
}
|
56 |
+
}
|
tokenizer.json
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6bd9e947fbe9c970a202ea2a4dd511892b6b239078cf5919690e58d35a43e3f2
|
3 |
+
size 17349635
|
tokenizer_config.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
trainer_state.json
ADDED
@@ -0,0 +1,1429 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"best_metric": 0.762063227953411,
|
3 |
+
"best_model_checkpoint": "finetuned_models/wisesight_sentiment/checkpoint-2400",
|
4 |
+
"epoch": 5.991124260355029,
|
5 |
+
"eval_steps": 100,
|
6 |
+
"global_step": 8100,
|
7 |
+
"is_hyper_param_search": false,
|
8 |
+
"is_local_process_zero": true,
|
9 |
+
"is_world_process_zero": true,
|
10 |
+
"log_history": [
|
11 |
+
{
|
12 |
+
"epoch": 0.07396449704142012,
|
13 |
+
"eval_class_f1": {
|
14 |
+
"neg": 0.018604651162790697,
|
15 |
+
"neu": 0.6997558991049634,
|
16 |
+
"pos": 0.0,
|
17 |
+
"q": 0.0
|
18 |
+
},
|
19 |
+
"eval_loss": 1.0810712575912476,
|
20 |
+
"eval_macro_average_f1": 0.17959013756693853,
|
21 |
+
"eval_micro_average_f1": 0.5391014975041597,
|
22 |
+
"eval_runtime": 6.8397,
|
23 |
+
"eval_samples_per_second": 351.476,
|
24 |
+
"eval_steps_per_second": 22.077,
|
25 |
+
"step": 100
|
26 |
+
},
|
27 |
+
{
|
28 |
+
"epoch": 0.14792899408284024,
|
29 |
+
"eval_class_f1": {
|
30 |
+
"neg": 0.6223055295220243,
|
31 |
+
"neu": 0.7497702909647779,
|
32 |
+
"pos": 0.0,
|
33 |
+
"q": 0.0
|
34 |
+
},
|
35 |
+
"eval_loss": 0.8820463418960571,
|
36 |
+
"eval_macro_average_f1": 0.34301895512170055,
|
37 |
+
"eval_micro_average_f1": 0.6472545757071547,
|
38 |
+
"eval_runtime": 6.8712,
|
39 |
+
"eval_samples_per_second": 349.865,
|
40 |
+
"eval_steps_per_second": 21.976,
|
41 |
+
"step": 200
|
42 |
+
},
|
43 |
+
{
|
44 |
+
"epoch": 0.22189349112426035,
|
45 |
+
"eval_class_f1": {
|
46 |
+
"neg": 0.7588757396449703,
|
47 |
+
"neu": 0.7731384829505916,
|
48 |
+
"pos": 0.27037037037037037,
|
49 |
+
"q": 0.0
|
50 |
+
},
|
51 |
+
"eval_loss": 0.7263810038566589,
|
52 |
+
"eval_macro_average_f1": 0.4505961482414831,
|
53 |
+
"eval_micro_average_f1": 0.7059068219633944,
|
54 |
+
"eval_runtime": 7.0136,
|
55 |
+
"eval_samples_per_second": 342.761,
|
56 |
+
"eval_steps_per_second": 21.529,
|
57 |
+
"step": 300
|
58 |
+
},
|
59 |
+
{
|
60 |
+
"epoch": 0.2958579881656805,
|
61 |
+
"eval_class_f1": {
|
62 |
+
"neg": 0.7806637806637807,
|
63 |
+
"neu": 0.760541586073501,
|
64 |
+
"pos": 0.501891551071879,
|
65 |
+
"q": 0.0909090909090909
|
66 |
+
},
|
67 |
+
"eval_loss": 0.6896220445632935,
|
68 |
+
"eval_macro_average_f1": 0.5335015021795629,
|
69 |
+
"eval_micro_average_f1": 0.7175540765391015,
|
70 |
+
"eval_runtime": 7.0909,
|
71 |
+
"eval_samples_per_second": 339.026,
|
72 |
+
"eval_steps_per_second": 21.295,
|
73 |
+
"step": 400
|
74 |
+
},
|
75 |
+
{
|
76 |
+
"epoch": 0.3698224852071006,
|
77 |
+
"grad_norm": 5.410265922546387,
|
78 |
+
"learning_rate": 1.828817733990148e-05,
|
79 |
+
"loss": 0.8994,
|
80 |
+
"step": 500
|
81 |
+
},
|
82 |
+
{
|
83 |
+
"epoch": 0.3698224852071006,
|
84 |
+
"eval_class_f1": {
|
85 |
+
"neg": 0.7767988252569751,
|
86 |
+
"neu": 0.7759882869692534,
|
87 |
+
"pos": 0.41987179487179493,
|
88 |
+
"q": 0.3777777777777778
|
89 |
+
},
|
90 |
+
"eval_loss": 0.6673027276992798,
|
91 |
+
"eval_macro_average_f1": 0.5876091712189503,
|
92 |
+
"eval_micro_average_f1": 0.7225457570715474,
|
93 |
+
"eval_runtime": 7.071,
|
94 |
+
"eval_samples_per_second": 339.978,
|
95 |
+
"eval_steps_per_second": 21.355,
|
96 |
+
"step": 500
|
97 |
+
},
|
98 |
+
{
|
99 |
+
"epoch": 0.4437869822485207,
|
100 |
+
"eval_class_f1": {
|
101 |
+
"neg": 0.7704042715484364,
|
102 |
+
"neu": 0.7415287628053587,
|
103 |
+
"pos": 0.5426356589147286,
|
104 |
+
"q": 0.14285714285714285
|
105 |
+
},
|
106 |
+
"eval_loss": 0.6752218008041382,
|
107 |
+
"eval_macro_average_f1": 0.5493564590314166,
|
108 |
+
"eval_micro_average_f1": 0.7050748752079867,
|
109 |
+
"eval_runtime": 7.2386,
|
110 |
+
"eval_samples_per_second": 332.107,
|
111 |
+
"eval_steps_per_second": 20.86,
|
112 |
+
"step": 600
|
113 |
+
},
|
114 |
+
{
|
115 |
+
"epoch": 0.5177514792899408,
|
116 |
+
"eval_class_f1": {
|
117 |
+
"neg": 0.7780979827089337,
|
118 |
+
"neu": 0.7535296490520371,
|
119 |
+
"pos": 0.5532435740514076,
|
120 |
+
"q": 0.3870967741935484
|
121 |
+
},
|
122 |
+
"eval_loss": 0.667128324508667,
|
123 |
+
"eval_macro_average_f1": 0.6179919950014817,
|
124 |
+
"eval_micro_average_f1": 0.7171381031613977,
|
125 |
+
"eval_runtime": 7.3636,
|
126 |
+
"eval_samples_per_second": 326.472,
|
127 |
+
"eval_steps_per_second": 20.506,
|
128 |
+
"step": 700
|
129 |
+
},
|
130 |
+
{
|
131 |
+
"epoch": 0.591715976331361,
|
132 |
+
"eval_class_f1": {
|
133 |
+
"neg": 0.7555923777961889,
|
134 |
+
"neu": 0.7529501332318232,
|
135 |
+
"pos": 0.5357575757575757,
|
136 |
+
"q": 0.4161073825503356
|
137 |
+
},
|
138 |
+
"eval_loss": 0.662220299243927,
|
139 |
+
"eval_macro_average_f1": 0.6151018673339809,
|
140 |
+
"eval_micro_average_f1": 0.7059068219633944,
|
141 |
+
"eval_runtime": 7.3848,
|
142 |
+
"eval_samples_per_second": 325.535,
|
143 |
+
"eval_steps_per_second": 20.448,
|
144 |
+
"step": 800
|
145 |
+
},
|
146 |
+
{
|
147 |
+
"epoch": 0.665680473372781,
|
148 |
+
"eval_class_f1": {
|
149 |
+
"neg": 0.7756714060031595,
|
150 |
+
"neu": 0.7722698471859858,
|
151 |
+
"pos": 0.5253807106598984,
|
152 |
+
"q": 0.2535211267605634
|
153 |
+
},
|
154 |
+
"eval_loss": 0.6270455121994019,
|
155 |
+
"eval_macro_average_f1": 0.5817107726524018,
|
156 |
+
"eval_micro_average_f1": 0.7250415973377704,
|
157 |
+
"eval_runtime": 7.3625,
|
158 |
+
"eval_samples_per_second": 326.52,
|
159 |
+
"eval_steps_per_second": 20.509,
|
160 |
+
"step": 900
|
161 |
+
},
|
162 |
+
{
|
163 |
+
"epoch": 0.7396449704142012,
|
164 |
+
"grad_norm": 7.580224514007568,
|
165 |
+
"learning_rate": 2.9252054794520548e-05,
|
166 |
+
"loss": 0.6495,
|
167 |
+
"step": 1000
|
168 |
+
},
|
169 |
+
{
|
170 |
+
"epoch": 0.7396449704142012,
|
171 |
+
"eval_class_f1": {
|
172 |
+
"neg": 0.7862993298585256,
|
173 |
+
"neu": 0.7885968159940763,
|
174 |
+
"pos": 0.5219858156028369,
|
175 |
+
"q": 0.30508474576271183
|
176 |
+
},
|
177 |
+
"eval_loss": 0.6415818929672241,
|
178 |
+
"eval_macro_average_f1": 0.6004916768045376,
|
179 |
+
"eval_micro_average_f1": 0.742928452579035,
|
180 |
+
"eval_runtime": 7.3324,
|
181 |
+
"eval_samples_per_second": 327.861,
|
182 |
+
"eval_steps_per_second": 20.594,
|
183 |
+
"step": 1000
|
184 |
+
},
|
185 |
+
{
|
186 |
+
"epoch": 0.8136094674556213,
|
187 |
+
"eval_class_f1": {
|
188 |
+
"neg": 0.7824267782426777,
|
189 |
+
"neu": 0.7750972762645915,
|
190 |
+
"pos": 0.550531914893617,
|
191 |
+
"q": 0.1923076923076923
|
192 |
+
},
|
193 |
+
"eval_loss": 0.6599770188331604,
|
194 |
+
"eval_macro_average_f1": 0.5750909154271446,
|
195 |
+
"eval_micro_average_f1": 0.7358569051580699,
|
196 |
+
"eval_runtime": 7.4168,
|
197 |
+
"eval_samples_per_second": 324.129,
|
198 |
+
"eval_steps_per_second": 20.359,
|
199 |
+
"step": 1100
|
200 |
+
},
|
201 |
+
{
|
202 |
+
"epoch": 0.8875739644970414,
|
203 |
+
"eval_class_f1": {
|
204 |
+
"neg": 0.7531806615776081,
|
205 |
+
"neu": 0.7700414000752727,
|
206 |
+
"pos": 0.5685164212910533,
|
207 |
+
"q": 0.35955056179775274
|
208 |
+
},
|
209 |
+
"eval_loss": 0.6348879933357239,
|
210 |
+
"eval_macro_average_f1": 0.6128222611854217,
|
211 |
+
"eval_micro_average_f1": 0.721297836938436,
|
212 |
+
"eval_runtime": 7.4408,
|
213 |
+
"eval_samples_per_second": 323.084,
|
214 |
+
"eval_steps_per_second": 20.294,
|
215 |
+
"step": 1200
|
216 |
+
},
|
217 |
+
{
|
218 |
+
"epoch": 0.9615384615384616,
|
219 |
+
"eval_class_f1": {
|
220 |
+
"neg": 0.7920646583394563,
|
221 |
+
"neu": 0.783076923076923,
|
222 |
+
"pos": 0.535014005602241,
|
223 |
+
"q": 0.46616541353383456
|
224 |
+
},
|
225 |
+
"eval_loss": 0.6110679507255554,
|
226 |
+
"eval_macro_average_f1": 0.6440802501381137,
|
227 |
+
"eval_micro_average_f1": 0.7400166389351082,
|
228 |
+
"eval_runtime": 7.4775,
|
229 |
+
"eval_samples_per_second": 321.498,
|
230 |
+
"eval_steps_per_second": 20.194,
|
231 |
+
"step": 1300
|
232 |
+
},
|
233 |
+
{
|
234 |
+
"epoch": 1.0355029585798816,
|
235 |
+
"eval_class_f1": {
|
236 |
+
"neg": 0.8015094339622642,
|
237 |
+
"neu": 0.8065934065934067,
|
238 |
+
"pos": 0.5252225519287834,
|
239 |
+
"q": 0.379746835443038
|
240 |
+
},
|
241 |
+
"eval_loss": 0.6416576504707336,
|
242 |
+
"eval_macro_average_f1": 0.628268056981873,
|
243 |
+
"eval_micro_average_f1": 0.7587354409317804,
|
244 |
+
"eval_runtime": 7.358,
|
245 |
+
"eval_samples_per_second": 326.721,
|
246 |
+
"eval_steps_per_second": 20.522,
|
247 |
+
"step": 1400
|
248 |
+
},
|
249 |
+
{
|
250 |
+
"epoch": 1.1094674556213018,
|
251 |
+
"grad_norm": 3.8226146697998047,
|
252 |
+
"learning_rate": 2.72013698630137e-05,
|
253 |
+
"loss": 0.6084,
|
254 |
+
"step": 1500
|
255 |
+
},
|
256 |
+
{
|
257 |
+
"epoch": 1.1094674556213018,
|
258 |
+
"eval_class_f1": {
|
259 |
+
"neg": 0.7846277021617293,
|
260 |
+
"neu": 0.8024917552216929,
|
261 |
+
"pos": 0.5813953488372093,
|
262 |
+
"q": 0.2857142857142857
|
263 |
+
},
|
264 |
+
"eval_loss": 0.6498740315437317,
|
265 |
+
"eval_macro_average_f1": 0.6135572729837293,
|
266 |
+
"eval_micro_average_f1": 0.7562396006655574,
|
267 |
+
"eval_runtime": 7.2975,
|
268 |
+
"eval_samples_per_second": 329.428,
|
269 |
+
"eval_steps_per_second": 20.692,
|
270 |
+
"step": 1500
|
271 |
+
},
|
272 |
+
{
|
273 |
+
"epoch": 1.183431952662722,
|
274 |
+
"eval_class_f1": {
|
275 |
+
"neg": 0.7917525773195877,
|
276 |
+
"neu": 0.7863247863247863,
|
277 |
+
"pos": 0.5444126074498568,
|
278 |
+
"q": 0.345679012345679
|
279 |
+
},
|
280 |
+
"eval_loss": 0.685055673122406,
|
281 |
+
"eval_macro_average_f1": 0.6170422458599774,
|
282 |
+
"eval_micro_average_f1": 0.7454242928452579,
|
283 |
+
"eval_runtime": 7.1996,
|
284 |
+
"eval_samples_per_second": 333.908,
|
285 |
+
"eval_steps_per_second": 20.973,
|
286 |
+
"step": 1600
|
287 |
+
},
|
288 |
+
{
|
289 |
+
"epoch": 1.2573964497041419,
|
290 |
+
"eval_class_f1": {
|
291 |
+
"neg": 0.7966231772831925,
|
292 |
+
"neu": 0.7783018867924529,
|
293 |
+
"pos": 0.5695216907675196,
|
294 |
+
"q": 0.25806451612903225
|
295 |
+
},
|
296 |
+
"eval_loss": 0.6685267090797424,
|
297 |
+
"eval_macro_average_f1": 0.6006278177430493,
|
298 |
+
"eval_micro_average_f1": 0.7375207986688852,
|
299 |
+
"eval_runtime": 7.2849,
|
300 |
+
"eval_samples_per_second": 329.999,
|
301 |
+
"eval_steps_per_second": 20.728,
|
302 |
+
"step": 1700
|
303 |
+
},
|
304 |
+
{
|
305 |
+
"epoch": 1.331360946745562,
|
306 |
+
"eval_class_f1": {
|
307 |
+
"neg": 0.8018362662586075,
|
308 |
+
"neu": 0.7977570093457944,
|
309 |
+
"pos": 0.5578947368421052,
|
310 |
+
"q": 0.36363636363636365
|
311 |
+
},
|
312 |
+
"eval_loss": 0.6347253918647766,
|
313 |
+
"eval_macro_average_f1": 0.6302810940207177,
|
314 |
+
"eval_micro_average_f1": 0.7549916805324459,
|
315 |
+
"eval_runtime": 7.1263,
|
316 |
+
"eval_samples_per_second": 337.343,
|
317 |
+
"eval_steps_per_second": 21.189,
|
318 |
+
"step": 1800
|
319 |
+
},
|
320 |
+
{
|
321 |
+
"epoch": 1.4053254437869822,
|
322 |
+
"eval_class_f1": {
|
323 |
+
"neg": 0.7917329093799682,
|
324 |
+
"neu": 0.7925512104283055,
|
325 |
+
"pos": 0.5742821473158551,
|
326 |
+
"q": 0.28125
|
327 |
+
},
|
328 |
+
"eval_loss": 0.6284430027008057,
|
329 |
+
"eval_macro_average_f1": 0.6099540667810323,
|
330 |
+
"eval_micro_average_f1": 0.7491680532445923,
|
331 |
+
"eval_runtime": 7.2447,
|
332 |
+
"eval_samples_per_second": 331.831,
|
333 |
+
"eval_steps_per_second": 20.843,
|
334 |
+
"step": 1900
|
335 |
+
},
|
336 |
+
{
|
337 |
+
"epoch": 1.4792899408284024,
|
338 |
+
"grad_norm": 4.046507835388184,
|
339 |
+
"learning_rate": 2.5146575342465757e-05,
|
340 |
+
"loss": 0.5135,
|
341 |
+
"step": 2000
|
342 |
+
},
|
343 |
+
{
|
344 |
+
"epoch": 1.4792899408284024,
|
345 |
+
"eval_class_f1": {
|
346 |
+
"neg": 0.7920792079207921,
|
347 |
+
"neu": 0.7867370007535796,
|
348 |
+
"pos": 0.5517241379310345,
|
349 |
+
"q": 0.27586206896551724
|
350 |
+
},
|
351 |
+
"eval_loss": 0.6431812644004822,
|
352 |
+
"eval_macro_average_f1": 0.601600603892731,
|
353 |
+
"eval_micro_average_f1": 0.7437603993344426,
|
354 |
+
"eval_runtime": 7.2356,
|
355 |
+
"eval_samples_per_second": 332.247,
|
356 |
+
"eval_steps_per_second": 20.869,
|
357 |
+
"step": 2000
|
358 |
+
},
|
359 |
+
{
|
360 |
+
"epoch": 1.5532544378698225,
|
361 |
+
"eval_class_f1": {
|
362 |
+
"neg": 0.7887550200803214,
|
363 |
+
"neu": 0.7950581395348836,
|
364 |
+
"pos": 0.555407209612817,
|
365 |
+
"q": 0.29032258064516125
|
366 |
+
},
|
367 |
+
"eval_loss": 0.6327183842658997,
|
368 |
+
"eval_macro_average_f1": 0.6073857374682958,
|
369 |
+
"eval_micro_average_f1": 0.7495840266222962,
|
370 |
+
"eval_runtime": 7.3333,
|
371 |
+
"eval_samples_per_second": 327.82,
|
372 |
+
"eval_steps_per_second": 20.591,
|
373 |
+
"step": 2100
|
374 |
+
},
|
375 |
+
{
|
376 |
+
"epoch": 1.6272189349112427,
|
377 |
+
"eval_class_f1": {
|
378 |
+
"neg": 0.7658119658119659,
|
379 |
+
"neu": 0.8002847988608045,
|
380 |
+
"pos": 0.56951871657754,
|
381 |
+
"q": 0.4197530864197531
|
382 |
+
},
|
383 |
+
"eval_loss": 0.6534045338630676,
|
384 |
+
"eval_macro_average_f1": 0.6388421419175159,
|
385 |
+
"eval_micro_average_f1": 0.7495840266222962,
|
386 |
+
"eval_runtime": 7.2578,
|
387 |
+
"eval_samples_per_second": 331.231,
|
388 |
+
"eval_steps_per_second": 20.805,
|
389 |
+
"step": 2200
|
390 |
+
},
|
391 |
+
{
|
392 |
+
"epoch": 1.7011834319526629,
|
393 |
+
"eval_class_f1": {
|
394 |
+
"neg": 0.7832369942196531,
|
395 |
+
"neu": 0.7650099403578529,
|
396 |
+
"pos": 0.5829268292682928,
|
397 |
+
"q": 0.35955056179775274
|
398 |
+
},
|
399 |
+
"eval_loss": 0.6581071019172668,
|
400 |
+
"eval_macro_average_f1": 0.6226810814108878,
|
401 |
+
"eval_micro_average_f1": 0.7316971713810316,
|
402 |
+
"eval_runtime": 7.1824,
|
403 |
+
"eval_samples_per_second": 334.708,
|
404 |
+
"eval_steps_per_second": 21.024,
|
405 |
+
"step": 2300
|
406 |
+
},
|
407 |
+
{
|
408 |
+
"epoch": 1.7751479289940828,
|
409 |
+
"eval_class_f1": {
|
410 |
+
"neg": 0.7951807228915662,
|
411 |
+
"neu": 0.8109843081312411,
|
412 |
+
"pos": 0.5441595441595442,
|
413 |
+
"q": 0.3157894736842105
|
414 |
+
},
|
415 |
+
"eval_loss": 0.6206311583518982,
|
416 |
+
"eval_macro_average_f1": 0.6165285122166405,
|
417 |
+
"eval_micro_average_f1": 0.762063227953411,
|
418 |
+
"eval_runtime": 7.2501,
|
419 |
+
"eval_samples_per_second": 331.583,
|
420 |
+
"eval_steps_per_second": 20.827,
|
421 |
+
"step": 2400
|
422 |
+
},
|
423 |
+
{
|
424 |
+
"epoch": 1.849112426035503,
|
425 |
+
"grad_norm": 6.195135593414307,
|
426 |
+
"learning_rate": 2.3091780821917807e-05,
|
427 |
+
"loss": 0.4995,
|
428 |
+
"step": 2500
|
429 |
+
},
|
430 |
+
{
|
431 |
+
"epoch": 1.849112426035503,
|
432 |
+
"eval_class_f1": {
|
433 |
+
"neg": 0.7932148626817447,
|
434 |
+
"neu": 0.8,
|
435 |
+
"pos": 0.5830164765525983,
|
436 |
+
"q": 0.196078431372549
|
437 |
+
},
|
438 |
+
"eval_loss": 0.6029447913169861,
|
439 |
+
"eval_macro_average_f1": 0.5930774426517229,
|
440 |
+
"eval_micro_average_f1": 0.7562396006655574,
|
441 |
+
"eval_runtime": 7.1935,
|
442 |
+
"eval_samples_per_second": 334.192,
|
443 |
+
"eval_steps_per_second": 20.991,
|
444 |
+
"step": 2500
|
445 |
+
},
|
446 |
+
{
|
447 |
+
"epoch": 1.9230769230769231,
|
448 |
+
"eval_class_f1": {
|
449 |
+
"neg": 0.8059236165237724,
|
450 |
+
"neu": 0.7899159663865546,
|
451 |
+
"pos": 0.579415501905972,
|
452 |
+
"q": 0.36666666666666664
|
453 |
+
},
|
454 |
+
"eval_loss": 0.6066814064979553,
|
455 |
+
"eval_macro_average_f1": 0.6354804378707414,
|
456 |
+
"eval_micro_average_f1": 0.7491680532445923,
|
457 |
+
"eval_runtime": 7.2817,
|
458 |
+
"eval_samples_per_second": 330.143,
|
459 |
+
"eval_steps_per_second": 20.737,
|
460 |
+
"step": 2600
|
461 |
+
},
|
462 |
+
{
|
463 |
+
"epoch": 1.997041420118343,
|
464 |
+
"eval_class_f1": {
|
465 |
+
"neg": 0.8003157063930545,
|
466 |
+
"neu": 0.797884397431054,
|
467 |
+
"pos": 0.5773447015834348,
|
468 |
+
"q": 0.3835616438356164
|
469 |
+
},
|
470 |
+
"eval_loss": 0.630171537399292,
|
471 |
+
"eval_macro_average_f1": 0.63977661231079,
|
472 |
+
"eval_micro_average_f1": 0.7545757071547421,
|
473 |
+
"eval_runtime": 7.2376,
|
474 |
+
"eval_samples_per_second": 332.156,
|
475 |
+
"eval_steps_per_second": 20.863,
|
476 |
+
"step": 2700
|
477 |
+
},
|
478 |
+
{
|
479 |
+
"epoch": 2.0710059171597632,
|
480 |
+
"eval_class_f1": {
|
481 |
+
"neg": 0.7848509266720386,
|
482 |
+
"neu": 0.7945103857566765,
|
483 |
+
"pos": 0.5853051058530511,
|
484 |
+
"q": 0.35294117647058826
|
485 |
+
},
|
486 |
+
"eval_loss": 0.7064331769943237,
|
487 |
+
"eval_macro_average_f1": 0.6294018986880886,
|
488 |
+
"eval_micro_average_f1": 0.7508319467554077,
|
489 |
+
"eval_runtime": 7.2934,
|
490 |
+
"eval_samples_per_second": 329.612,
|
491 |
+
"eval_steps_per_second": 20.704,
|
492 |
+
"step": 2800
|
493 |
+
},
|
494 |
+
{
|
495 |
+
"epoch": 2.1449704142011834,
|
496 |
+
"eval_class_f1": {
|
497 |
+
"neg": 0.797752808988764,
|
498 |
+
"neu": 0.8026412325752018,
|
499 |
+
"pos": 0.5824742268041238,
|
500 |
+
"q": 0.26666666666666666
|
501 |
+
},
|
502 |
+
"eval_loss": 0.7201129794120789,
|
503 |
+
"eval_macro_average_f1": 0.612383733758689,
|
504 |
+
"eval_micro_average_f1": 0.7591514143094842,
|
505 |
+
"eval_runtime": 7.1883,
|
506 |
+
"eval_samples_per_second": 334.433,
|
507 |
+
"eval_steps_per_second": 21.006,
|
508 |
+
"step": 2900
|
509 |
+
},
|
510 |
+
{
|
511 |
+
"epoch": 2.2189349112426036,
|
512 |
+
"grad_norm": 6.065237045288086,
|
513 |
+
"learning_rate": 2.1036986301369864e-05,
|
514 |
+
"loss": 0.4003,
|
515 |
+
"step": 3000
|
516 |
+
},
|
517 |
+
{
|
518 |
+
"epoch": 2.2189349112426036,
|
519 |
+
"eval_class_f1": {
|
520 |
+
"neg": 0.7861271676300579,
|
521 |
+
"neu": 0.800578034682081,
|
522 |
+
"pos": 0.5670391061452514,
|
523 |
+
"q": 0.3185840707964602
|
524 |
+
},
|
525 |
+
"eval_loss": 0.7178497910499573,
|
526 |
+
"eval_macro_average_f1": 0.6180820948134627,
|
527 |
+
"eval_micro_average_f1": 0.7508319467554077,
|
528 |
+
"eval_runtime": 7.2106,
|
529 |
+
"eval_samples_per_second": 333.398,
|
530 |
+
"eval_steps_per_second": 20.941,
|
531 |
+
"step": 3000
|
532 |
+
},
|
533 |
+
{
|
534 |
+
"epoch": 2.2928994082840237,
|
535 |
+
"eval_class_f1": {
|
536 |
+
"neg": 0.7955801104972374,
|
537 |
+
"neu": 0.781854043392505,
|
538 |
+
"pos": 0.5852585258525853,
|
539 |
+
"q": 0.28865979381443296
|
540 |
+
},
|
541 |
+
"eval_loss": 0.7727176547050476,
|
542 |
+
"eval_macro_average_f1": 0.6128381183891901,
|
543 |
+
"eval_micro_average_f1": 0.7383527454242929,
|
544 |
+
"eval_runtime": 7.2299,
|
545 |
+
"eval_samples_per_second": 332.51,
|
546 |
+
"eval_steps_per_second": 20.886,
|
547 |
+
"step": 3100
|
548 |
+
},
|
549 |
+
{
|
550 |
+
"epoch": 2.366863905325444,
|
551 |
+
"eval_class_f1": {
|
552 |
+
"neg": 0.7893462469733656,
|
553 |
+
"neu": 0.788983997022702,
|
554 |
+
"pos": 0.5606060606060607,
|
555 |
+
"q": 0.2888888888888889
|
556 |
+
},
|
557 |
+
"eval_loss": 0.7219040393829346,
|
558 |
+
"eval_macro_average_f1": 0.6069562983727543,
|
559 |
+
"eval_micro_average_f1": 0.7420965058236273,
|
560 |
+
"eval_runtime": 7.2669,
|
561 |
+
"eval_samples_per_second": 330.815,
|
562 |
+
"eval_steps_per_second": 20.779,
|
563 |
+
"step": 3200
|
564 |
+
},
|
565 |
+
{
|
566 |
+
"epoch": 2.440828402366864,
|
567 |
+
"eval_class_f1": {
|
568 |
+
"neg": 0.8073115003808072,
|
569 |
+
"neu": 0.7814829344841114,
|
570 |
+
"pos": 0.5855338691159586,
|
571 |
+
"q": 0.26666666666666666
|
572 |
+
},
|
573 |
+
"eval_loss": 0.7229210734367371,
|
574 |
+
"eval_macro_average_f1": 0.610248742661886,
|
575 |
+
"eval_micro_average_f1": 0.7450083194675541,
|
576 |
+
"eval_runtime": 7.1283,
|
577 |
+
"eval_samples_per_second": 337.248,
|
578 |
+
"eval_steps_per_second": 21.183,
|
579 |
+
"step": 3300
|
580 |
+
},
|
581 |
+
{
|
582 |
+
"epoch": 2.5147928994082838,
|
583 |
+
"eval_class_f1": {
|
584 |
+
"neg": 0.7984790874524715,
|
585 |
+
"neu": 0.7856049004594182,
|
586 |
+
"pos": 0.5773447015834348,
|
587 |
+
"q": 0.3
|
588 |
+
},
|
589 |
+
"eval_loss": 0.7037935853004456,
|
590 |
+
"eval_macro_average_f1": 0.615357172373831,
|
591 |
+
"eval_micro_average_f1": 0.747504159733777,
|
592 |
+
"eval_runtime": 7.2219,
|
593 |
+
"eval_samples_per_second": 332.876,
|
594 |
+
"eval_steps_per_second": 20.909,
|
595 |
+
"step": 3400
|
596 |
+
},
|
597 |
+
{
|
598 |
+
"epoch": 2.5887573964497044,
|
599 |
+
"grad_norm": 3.8475677967071533,
|
600 |
+
"learning_rate": 1.8982191780821918e-05,
|
601 |
+
"loss": 0.3579,
|
602 |
+
"step": 3500
|
603 |
+
},
|
604 |
+
{
|
605 |
+
"epoch": 2.5887573964497044,
|
606 |
+
"eval_class_f1": {
|
607 |
+
"neg": 0.7871815940838127,
|
608 |
+
"neu": 0.7871305649083427,
|
609 |
+
"pos": 0.5738916256157636,
|
610 |
+
"q": 0.37735849056603776
|
611 |
+
},
|
612 |
+
"eval_loss": 0.7569752931594849,
|
613 |
+
"eval_macro_average_f1": 0.6313905687934891,
|
614 |
+
"eval_micro_average_f1": 0.7420965058236273,
|
615 |
+
"eval_runtime": 7.3391,
|
616 |
+
"eval_samples_per_second": 327.56,
|
617 |
+
"eval_steps_per_second": 20.575,
|
618 |
+
"step": 3500
|
619 |
+
},
|
620 |
+
{
|
621 |
+
"epoch": 2.662721893491124,
|
622 |
+
"eval_class_f1": {
|
623 |
+
"neg": 0.8064269319051262,
|
624 |
+
"neu": 0.7905718701700155,
|
625 |
+
"pos": 0.5779927448609432,
|
626 |
+
"q": 0.3255813953488372
|
627 |
+
},
|
628 |
+
"eval_loss": 0.7201011180877686,
|
629 |
+
"eval_macro_average_f1": 0.6251432355712305,
|
630 |
+
"eval_micro_average_f1": 0.75,
|
631 |
+
"eval_runtime": 7.2188,
|
632 |
+
"eval_samples_per_second": 333.02,
|
633 |
+
"eval_steps_per_second": 20.918,
|
634 |
+
"step": 3600
|
635 |
+
},
|
636 |
+
{
|
637 |
+
"epoch": 2.7366863905325443,
|
638 |
+
"eval_class_f1": {
|
639 |
+
"neg": 0.7847896440129449,
|
640 |
+
"neu": 0.7701911822083495,
|
641 |
+
"pos": 0.5797413793103448,
|
642 |
+
"q": 0.3703703703703704
|
643 |
+
},
|
644 |
+
"eval_loss": 0.7302864789962769,
|
645 |
+
"eval_macro_average_f1": 0.6262731439755023,
|
646 |
+
"eval_micro_average_f1": 0.7304492512479202,
|
647 |
+
"eval_runtime": 7.2541,
|
648 |
+
"eval_samples_per_second": 331.401,
|
649 |
+
"eval_steps_per_second": 20.816,
|
650 |
+
"step": 3700
|
651 |
+
},
|
652 |
+
{
|
653 |
+
"epoch": 2.8106508875739644,
|
654 |
+
"eval_class_f1": {
|
655 |
+
"neg": 0.7971698113207547,
|
656 |
+
"neu": 0.8014842300556586,
|
657 |
+
"pos": 0.5839793281653747,
|
658 |
+
"q": 0.3283582089552239
|
659 |
+
},
|
660 |
+
"eval_loss": 0.7112248539924622,
|
661 |
+
"eval_macro_average_f1": 0.627747894624253,
|
662 |
+
"eval_micro_average_f1": 0.7587354409317804,
|
663 |
+
"eval_runtime": 7.2905,
|
664 |
+
"eval_samples_per_second": 329.743,
|
665 |
+
"eval_steps_per_second": 20.712,
|
666 |
+
"step": 3800
|
667 |
+
},
|
668 |
+
{
|
669 |
+
"epoch": 2.8846153846153846,
|
670 |
+
"eval_class_f1": {
|
671 |
+
"neg": 0.7999999999999999,
|
672 |
+
"neu": 0.7944066515495087,
|
673 |
+
"pos": 0.5961995249406176,
|
674 |
+
"q": 0.2545454545454545
|
675 |
+
},
|
676 |
+
"eval_loss": 0.7105884552001953,
|
677 |
+
"eval_macro_average_f1": 0.6112879077588952,
|
678 |
+
"eval_micro_average_f1": 0.7549916805324459,
|
679 |
+
"eval_runtime": 7.4167,
|
680 |
+
"eval_samples_per_second": 324.132,
|
681 |
+
"eval_steps_per_second": 20.359,
|
682 |
+
"step": 3900
|
683 |
+
},
|
684 |
+
{
|
685 |
+
"epoch": 2.9585798816568047,
|
686 |
+
"grad_norm": 8.97050666809082,
|
687 |
+
"learning_rate": 1.6927397260273975e-05,
|
688 |
+
"loss": 0.3409,
|
689 |
+
"step": 4000
|
690 |
+
},
|
691 |
+
{
|
692 |
+
"epoch": 2.9585798816568047,
|
693 |
+
"eval_class_f1": {
|
694 |
+
"neg": 0.803088803088803,
|
695 |
+
"neu": 0.7901328273244782,
|
696 |
+
"pos": 0.5671641791044775,
|
697 |
+
"q": 0.3513513513513513
|
698 |
+
},
|
699 |
+
"eval_loss": 0.7364293932914734,
|
700 |
+
"eval_macro_average_f1": 0.6279342902172774,
|
701 |
+
"eval_micro_average_f1": 0.7495840266222962,
|
702 |
+
"eval_runtime": 7.2488,
|
703 |
+
"eval_samples_per_second": 331.641,
|
704 |
+
"eval_steps_per_second": 20.831,
|
705 |
+
"step": 4000
|
706 |
+
},
|
707 |
+
{
|
708 |
+
"epoch": 3.032544378698225,
|
709 |
+
"eval_class_f1": {
|
710 |
+
"neg": 0.7924836601307189,
|
711 |
+
"neu": 0.7892777364110202,
|
712 |
+
"pos": 0.5696969696969698,
|
713 |
+
"q": 0.3287671232876712
|
714 |
+
},
|
715 |
+
"eval_loss": 0.8425710201263428,
|
716 |
+
"eval_macro_average_f1": 0.6200563723815951,
|
717 |
+
"eval_micro_average_f1": 0.7454242928452579,
|
718 |
+
"eval_runtime": 7.1671,
|
719 |
+
"eval_samples_per_second": 335.422,
|
720 |
+
"eval_steps_per_second": 21.068,
|
721 |
+
"step": 4100
|
722 |
+
},
|
723 |
+
{
|
724 |
+
"epoch": 3.106508875739645,
|
725 |
+
"eval_class_f1": {
|
726 |
+
"neg": 0.7883817427385893,
|
727 |
+
"neu": 0.7684537684537684,
|
728 |
+
"pos": 0.5720338983050848,
|
729 |
+
"q": 0.35294117647058826
|
730 |
+
},
|
731 |
+
"eval_loss": 0.9264113306999207,
|
732 |
+
"eval_macro_average_f1": 0.6204526464920077,
|
733 |
+
"eval_micro_average_f1": 0.7275374376039934,
|
734 |
+
"eval_runtime": 7.2924,
|
735 |
+
"eval_samples_per_second": 329.656,
|
736 |
+
"eval_steps_per_second": 20.706,
|
737 |
+
"step": 4200
|
738 |
+
},
|
739 |
+
{
|
740 |
+
"epoch": 3.1804733727810652,
|
741 |
+
"eval_class_f1": {
|
742 |
+
"neg": 0.8064269319051262,
|
743 |
+
"neu": 0.7787333854573885,
|
744 |
+
"pos": 0.5774647887323944,
|
745 |
+
"q": 0.32967032967032966
|
746 |
+
},
|
747 |
+
"eval_loss": 0.9222328662872314,
|
748 |
+
"eval_macro_average_f1": 0.6230738589413097,
|
749 |
+
"eval_micro_average_f1": 0.7420965058236273,
|
750 |
+
"eval_runtime": 7.2012,
|
751 |
+
"eval_samples_per_second": 333.833,
|
752 |
+
"eval_steps_per_second": 20.969,
|
753 |
+
"step": 4300
|
754 |
+
},
|
755 |
+
{
|
756 |
+
"epoch": 3.2544378698224854,
|
757 |
+
"eval_class_f1": {
|
758 |
+
"neg": 0.7999999999999999,
|
759 |
+
"neu": 0.7803557617942769,
|
760 |
+
"pos": 0.5765124555160142,
|
761 |
+
"q": 0.35955056179775274
|
762 |
+
},
|
763 |
+
"eval_loss": 0.9496058821678162,
|
764 |
+
"eval_macro_average_f1": 0.6291046947770109,
|
765 |
+
"eval_micro_average_f1": 0.7420965058236273,
|
766 |
+
"eval_runtime": 7.3334,
|
767 |
+
"eval_samples_per_second": 327.814,
|
768 |
+
"eval_steps_per_second": 20.591,
|
769 |
+
"step": 4400
|
770 |
+
},
|
771 |
+
{
|
772 |
+
"epoch": 3.328402366863905,
|
773 |
+
"grad_norm": 12.435276985168457,
|
774 |
+
"learning_rate": 1.4872602739726027e-05,
|
775 |
+
"loss": 0.2249,
|
776 |
+
"step": 4500
|
777 |
+
},
|
778 |
+
{
|
779 |
+
"epoch": 3.328402366863905,
|
780 |
+
"eval_class_f1": {
|
781 |
+
"neg": 0.8012718600953895,
|
782 |
+
"neu": 0.784238714613619,
|
783 |
+
"pos": 0.5663082437275986,
|
784 |
+
"q": 0.32323232323232326
|
785 |
+
},
|
786 |
+
"eval_loss": 0.9026820063591003,
|
787 |
+
"eval_macro_average_f1": 0.6187627854172325,
|
788 |
+
"eval_micro_average_f1": 0.7412645590682196,
|
789 |
+
"eval_runtime": 7.1404,
|
790 |
+
"eval_samples_per_second": 336.677,
|
791 |
+
"eval_steps_per_second": 21.147,
|
792 |
+
"step": 4500
|
793 |
+
},
|
794 |
+
{
|
795 |
+
"epoch": 3.4023668639053253,
|
796 |
+
"eval_class_f1": {
|
797 |
+
"neg": 0.8043647700701482,
|
798 |
+
"neu": 0.7884322678843227,
|
799 |
+
"pos": 0.5676328502415459,
|
800 |
+
"q": 0.2898550724637681
|
801 |
+
},
|
802 |
+
"eval_loss": 0.943065345287323,
|
803 |
+
"eval_macro_average_f1": 0.6125712401649462,
|
804 |
+
"eval_micro_average_f1": 0.747504159733777,
|
805 |
+
"eval_runtime": 7.2681,
|
806 |
+
"eval_samples_per_second": 330.759,
|
807 |
+
"eval_steps_per_second": 20.776,
|
808 |
+
"step": 4600
|
809 |
+
},
|
810 |
+
{
|
811 |
+
"epoch": 3.4763313609467454,
|
812 |
+
"eval_class_f1": {
|
813 |
+
"neg": 0.8018504240555128,
|
814 |
+
"neu": 0.7930382141505864,
|
815 |
+
"pos": 0.5692503176620076,
|
816 |
+
"q": 0.345679012345679
|
817 |
+
},
|
818 |
+
"eval_loss": 0.9825762510299683,
|
819 |
+
"eval_macro_average_f1": 0.6274544920534464,
|
820 |
+
"eval_micro_average_f1": 0.7512479201331115,
|
821 |
+
"eval_runtime": 7.2921,
|
822 |
+
"eval_samples_per_second": 329.672,
|
823 |
+
"eval_steps_per_second": 20.707,
|
824 |
+
"step": 4700
|
825 |
+
},
|
826 |
+
{
|
827 |
+
"epoch": 3.5502958579881656,
|
828 |
+
"eval_class_f1": {
|
829 |
+
"neg": 0.7946498819826908,
|
830 |
+
"neu": 0.7813455657492355,
|
831 |
+
"pos": 0.5795053003533569,
|
832 |
+
"q": 0.3055555555555555
|
833 |
+
},
|
834 |
+
"eval_loss": 0.9374552965164185,
|
835 |
+
"eval_macro_average_f1": 0.6152640759102097,
|
836 |
+
"eval_micro_average_f1": 0.7420965058236273,
|
837 |
+
"eval_runtime": 7.387,
|
838 |
+
"eval_samples_per_second": 325.436,
|
839 |
+
"eval_steps_per_second": 20.441,
|
840 |
+
"step": 4800
|
841 |
+
},
|
842 |
+
{
|
843 |
+
"epoch": 3.6242603550295858,
|
844 |
+
"eval_class_f1": {
|
845 |
+
"neg": 0.7984790874524715,
|
846 |
+
"neu": 0.7785547785547785,
|
847 |
+
"pos": 0.5748218527315915,
|
848 |
+
"q": 0.3116883116883117
|
849 |
+
},
|
850 |
+
"eval_loss": 0.9656402468681335,
|
851 |
+
"eval_macro_average_f1": 0.6158860076067884,
|
852 |
+
"eval_micro_average_f1": 0.7408485856905158,
|
853 |
+
"eval_runtime": 7.4821,
|
854 |
+
"eval_samples_per_second": 321.299,
|
855 |
+
"eval_steps_per_second": 20.181,
|
856 |
+
"step": 4900
|
857 |
+
},
|
858 |
+
{
|
859 |
+
"epoch": 3.698224852071006,
|
860 |
+
"grad_norm": 0.6623280644416809,
|
861 |
+
"learning_rate": 1.2817808219178083e-05,
|
862 |
+
"loss": 0.2207,
|
863 |
+
"step": 5000
|
864 |
+
},
|
865 |
+
{
|
866 |
+
"epoch": 3.698224852071006,
|
867 |
+
"eval_class_f1": {
|
868 |
+
"neg": 0.7987616099071206,
|
869 |
+
"neu": 0.7862857142857143,
|
870 |
+
"pos": 0.5647348951911221,
|
871 |
+
"q": 0.3
|
872 |
+
},
|
873 |
+
"eval_loss": 0.9422620534896851,
|
874 |
+
"eval_macro_average_f1": 0.6124455548459892,
|
875 |
+
"eval_micro_average_f1": 0.7441763727121464,
|
876 |
+
"eval_runtime": 7.2765,
|
877 |
+
"eval_samples_per_second": 330.379,
|
878 |
+
"eval_steps_per_second": 20.752,
|
879 |
+
"step": 5000
|
880 |
+
},
|
881 |
+
{
|
882 |
+
"epoch": 3.772189349112426,
|
883 |
+
"eval_class_f1": {
|
884 |
+
"neg": 0.8,
|
885 |
+
"neu": 0.7671342685370742,
|
886 |
+
"pos": 0.5714285714285714,
|
887 |
+
"q": 0.3414634146341463
|
888 |
+
},
|
889 |
+
"eval_loss": 0.9625053405761719,
|
890 |
+
"eval_macro_average_f1": 0.620006563649948,
|
891 |
+
"eval_micro_average_f1": 0.7296173044925125,
|
892 |
+
"eval_runtime": 7.3445,
|
893 |
+
"eval_samples_per_second": 327.318,
|
894 |
+
"eval_steps_per_second": 20.559,
|
895 |
+
"step": 5100
|
896 |
+
},
|
897 |
+
{
|
898 |
+
"epoch": 3.8461538461538463,
|
899 |
+
"eval_class_f1": {
|
900 |
+
"neg": 0.8018942383583267,
|
901 |
+
"neu": 0.7968691762951919,
|
902 |
+
"pos": 0.5706874189364461,
|
903 |
+
"q": 0.25287356321839083
|
904 |
+
},
|
905 |
+
"eval_loss": 0.9822611212730408,
|
906 |
+
"eval_macro_average_f1": 0.6055810992020889,
|
907 |
+
"eval_micro_average_f1": 0.7520798668885191,
|
908 |
+
"eval_runtime": 7.4499,
|
909 |
+
"eval_samples_per_second": 322.688,
|
910 |
+
"eval_steps_per_second": 20.269,
|
911 |
+
"step": 5200
|
912 |
+
},
|
913 |
+
{
|
914 |
+
"epoch": 3.9201183431952664,
|
915 |
+
"eval_class_f1": {
|
916 |
+
"neg": 0.7891268533772653,
|
917 |
+
"neu": 0.7875375375375375,
|
918 |
+
"pos": 0.5810968494749125,
|
919 |
+
"q": 0.273972602739726
|
920 |
+
},
|
921 |
+
"eval_loss": 0.9442653656005859,
|
922 |
+
"eval_macro_average_f1": 0.6079334607823603,
|
923 |
+
"eval_micro_average_f1": 0.7433444259567388,
|
924 |
+
"eval_runtime": 7.3731,
|
925 |
+
"eval_samples_per_second": 326.052,
|
926 |
+
"eval_steps_per_second": 20.48,
|
927 |
+
"step": 5300
|
928 |
+
},
|
929 |
+
{
|
930 |
+
"epoch": 3.994082840236686,
|
931 |
+
"eval_class_f1": {
|
932 |
+
"neg": 0.8046511627906977,
|
933 |
+
"neu": 0.7945516458569808,
|
934 |
+
"pos": 0.5775,
|
935 |
+
"q": 0.32
|
936 |
+
},
|
937 |
+
"eval_loss": 0.9429491758346558,
|
938 |
+
"eval_macro_average_f1": 0.6241757021619195,
|
939 |
+
"eval_micro_average_f1": 0.7537437603993344,
|
940 |
+
"eval_runtime": 7.3966,
|
941 |
+
"eval_samples_per_second": 325.013,
|
942 |
+
"eval_steps_per_second": 20.415,
|
943 |
+
"step": 5400
|
944 |
+
},
|
945 |
+
{
|
946 |
+
"epoch": 4.068047337278107,
|
947 |
+
"grad_norm": 2.4124114513397217,
|
948 |
+
"learning_rate": 1.0763013698630138e-05,
|
949 |
+
"loss": 0.2077,
|
950 |
+
"step": 5500
|
951 |
+
},
|
952 |
+
{
|
953 |
+
"epoch": 4.068047337278107,
|
954 |
+
"eval_class_f1": {
|
955 |
+
"neg": 0.8063781321184511,
|
956 |
+
"neu": 0.7866927592954991,
|
957 |
+
"pos": 0.5862884160756501,
|
958 |
+
"q": 0.3333333333333333
|
959 |
+
},
|
960 |
+
"eval_loss": 1.1077452898025513,
|
961 |
+
"eval_macro_average_f1": 0.6281731602057334,
|
962 |
+
"eval_micro_average_f1": 0.7483361064891847,
|
963 |
+
"eval_runtime": 7.369,
|
964 |
+
"eval_samples_per_second": 326.23,
|
965 |
+
"eval_steps_per_second": 20.491,
|
966 |
+
"step": 5500
|
967 |
+
},
|
968 |
+
{
|
969 |
+
"epoch": 4.1420118343195265,
|
970 |
+
"eval_class_f1": {
|
971 |
+
"neg": 0.7993920972644377,
|
972 |
+
"neu": 0.7660256410256411,
|
973 |
+
"pos": 0.5726775956284154,
|
974 |
+
"q": 0.29629629629629634
|
975 |
+
},
|
976 |
+
"eval_loss": 1.1472598314285278,
|
977 |
+
"eval_macro_average_f1": 0.6085979075536977,
|
978 |
+
"eval_micro_average_f1": 0.7304492512479202,
|
979 |
+
"eval_runtime": 7.5033,
|
980 |
+
"eval_samples_per_second": 320.394,
|
981 |
+
"eval_steps_per_second": 20.125,
|
982 |
+
"step": 5600
|
983 |
+
},
|
984 |
+
{
|
985 |
+
"epoch": 4.215976331360947,
|
986 |
+
"eval_class_f1": {
|
987 |
+
"neg": 0.8024502297090352,
|
988 |
+
"neu": 0.7868978805394989,
|
989 |
+
"pos": 0.5731559854897219,
|
990 |
+
"q": 0.3
|
991 |
+
},
|
992 |
+
"eval_loss": 1.169406533241272,
|
993 |
+
"eval_macro_average_f1": 0.615626023934564,
|
994 |
+
"eval_micro_average_f1": 0.7462562396006656,
|
995 |
+
"eval_runtime": 7.4833,
|
996 |
+
"eval_samples_per_second": 321.249,
|
997 |
+
"eval_steps_per_second": 20.178,
|
998 |
+
"step": 5700
|
999 |
+
},
|
1000 |
+
{
|
1001 |
+
"epoch": 4.289940828402367,
|
1002 |
+
"eval_class_f1": {
|
1003 |
+
"neg": 0.803951367781155,
|
1004 |
+
"neu": 0.7902550437761706,
|
1005 |
+
"pos": 0.5685019206145967,
|
1006 |
+
"q": 0.30952380952380953
|
1007 |
+
},
|
1008 |
+
"eval_loss": 1.1968339681625366,
|
1009 |
+
"eval_macro_average_f1": 0.6180580354239329,
|
1010 |
+
"eval_micro_average_f1": 0.7495840266222962,
|
1011 |
+
"eval_runtime": 7.4382,
|
1012 |
+
"eval_samples_per_second": 323.197,
|
1013 |
+
"eval_steps_per_second": 20.301,
|
1014 |
+
"step": 5800
|
1015 |
+
},
|
1016 |
+
{
|
1017 |
+
"epoch": 4.363905325443787,
|
1018 |
+
"eval_class_f1": {
|
1019 |
+
"neg": 0.8024786986831913,
|
1020 |
+
"neu": 0.7916030534351146,
|
1021 |
+
"pos": 0.5773955773955775,
|
1022 |
+
"q": 0.3373493975903615
|
1023 |
+
},
|
1024 |
+
"eval_loss": 1.1896393299102783,
|
1025 |
+
"eval_macro_average_f1": 0.6272066817760612,
|
1026 |
+
"eval_micro_average_f1": 0.7504159733777038,
|
1027 |
+
"eval_runtime": 7.423,
|
1028 |
+
"eval_samples_per_second": 323.857,
|
1029 |
+
"eval_steps_per_second": 20.342,
|
1030 |
+
"step": 5900
|
1031 |
+
},
|
1032 |
+
{
|
1033 |
+
"epoch": 4.437869822485207,
|
1034 |
+
"grad_norm": 0.9506312608718872,
|
1035 |
+
"learning_rate": 8.708219178082192e-06,
|
1036 |
+
"loss": 0.1324,
|
1037 |
+
"step": 6000
|
1038 |
+
},
|
1039 |
+
{
|
1040 |
+
"epoch": 4.437869822485207,
|
1041 |
+
"eval_class_f1": {
|
1042 |
+
"neg": 0.8024316109422492,
|
1043 |
+
"neu": 0.7925840092699884,
|
1044 |
+
"pos": 0.5878048780487805,
|
1045 |
+
"q": 0.28915662650602414
|
1046 |
+
},
|
1047 |
+
"eval_loss": 1.2535008192062378,
|
1048 |
+
"eval_macro_average_f1": 0.6179942811917606,
|
1049 |
+
"eval_micro_average_f1": 0.7516638935108153,
|
1050 |
+
"eval_runtime": 7.3808,
|
1051 |
+
"eval_samples_per_second": 325.711,
|
1052 |
+
"eval_steps_per_second": 20.459,
|
1053 |
+
"step": 6000
|
1054 |
+
},
|
1055 |
+
{
|
1056 |
+
"epoch": 4.511834319526627,
|
1057 |
+
"eval_class_f1": {
|
1058 |
+
"neg": 0.7901821060965954,
|
1059 |
+
"neu": 0.7819374758780393,
|
1060 |
+
"pos": 0.5821064552661382,
|
1061 |
+
"q": 0.2535211267605634
|
1062 |
+
},
|
1063 |
+
"eval_loss": 1.2182434797286987,
|
1064 |
+
"eval_macro_average_f1": 0.601936791000334,
|
1065 |
+
"eval_micro_average_f1": 0.7396006655574043,
|
1066 |
+
"eval_runtime": 7.3498,
|
1067 |
+
"eval_samples_per_second": 327.085,
|
1068 |
+
"eval_steps_per_second": 20.545,
|
1069 |
+
"step": 6100
|
1070 |
+
},
|
1071 |
+
{
|
1072 |
+
"epoch": 4.585798816568047,
|
1073 |
+
"eval_class_f1": {
|
1074 |
+
"neg": 0.7945425361155697,
|
1075 |
+
"neu": 0.7956989247311828,
|
1076 |
+
"pos": 0.5761006289308177,
|
1077 |
+
"q": 0.22857142857142854
|
1078 |
+
},
|
1079 |
+
"eval_loss": 1.2836171388626099,
|
1080 |
+
"eval_macro_average_f1": 0.5987283795872497,
|
1081 |
+
"eval_micro_average_f1": 0.7508319467554077,
|
1082 |
+
"eval_runtime": 7.4332,
|
1083 |
+
"eval_samples_per_second": 323.412,
|
1084 |
+
"eval_steps_per_second": 20.314,
|
1085 |
+
"step": 6200
|
1086 |
+
},
|
1087 |
+
{
|
1088 |
+
"epoch": 4.659763313609467,
|
1089 |
+
"eval_class_f1": {
|
1090 |
+
"neg": 0.8003025718608169,
|
1091 |
+
"neu": 0.7733970529669454,
|
1092 |
+
"pos": 0.5691609977324262,
|
1093 |
+
"q": 0.3225806451612903
|
1094 |
+
},
|
1095 |
+
"eval_loss": 1.2842472791671753,
|
1096 |
+
"eval_macro_average_f1": 0.6163603169303697,
|
1097 |
+
"eval_micro_average_f1": 0.7346089850249584,
|
1098 |
+
"eval_runtime": 7.4941,
|
1099 |
+
"eval_samples_per_second": 320.786,
|
1100 |
+
"eval_steps_per_second": 20.149,
|
1101 |
+
"step": 6300
|
1102 |
+
},
|
1103 |
+
{
|
1104 |
+
"epoch": 4.733727810650888,
|
1105 |
+
"eval_class_f1": {
|
1106 |
+
"neg": 0.799375487900078,
|
1107 |
+
"neu": 0.7936865839909809,
|
1108 |
+
"pos": 0.5614489003880984,
|
1109 |
+
"q": 0.3225806451612903
|
1110 |
+
},
|
1111 |
+
"eval_loss": 1.3067219257354736,
|
1112 |
+
"eval_macro_average_f1": 0.619272904360112,
|
1113 |
+
"eval_micro_average_f1": 0.7487520798668885,
|
1114 |
+
"eval_runtime": 7.3805,
|
1115 |
+
"eval_samples_per_second": 325.723,
|
1116 |
+
"eval_steps_per_second": 20.459,
|
1117 |
+
"step": 6400
|
1118 |
+
},
|
1119 |
+
{
|
1120 |
+
"epoch": 4.8076923076923075,
|
1121 |
+
"grad_norm": 0.2732117772102356,
|
1122 |
+
"learning_rate": 6.653424657534246e-06,
|
1123 |
+
"loss": 0.1441,
|
1124 |
+
"step": 6500
|
1125 |
+
},
|
1126 |
+
{
|
1127 |
+
"epoch": 4.8076923076923075,
|
1128 |
+
"eval_class_f1": {
|
1129 |
+
"neg": 0.8027628549501151,
|
1130 |
+
"neu": 0.7751572327044026,
|
1131 |
+
"pos": 0.5694760820045559,
|
1132 |
+
"q": 0.3132530120481927
|
1133 |
+
},
|
1134 |
+
"eval_loss": 1.2718240022659302,
|
1135 |
+
"eval_macro_average_f1": 0.6151622954268166,
|
1136 |
+
"eval_micro_average_f1": 0.7371048252911814,
|
1137 |
+
"eval_runtime": 7.3822,
|
1138 |
+
"eval_samples_per_second": 325.649,
|
1139 |
+
"eval_steps_per_second": 20.455,
|
1140 |
+
"step": 6500
|
1141 |
+
},
|
1142 |
+
{
|
1143 |
+
"epoch": 4.881656804733728,
|
1144 |
+
"eval_class_f1": {
|
1145 |
+
"neg": 0.796875,
|
1146 |
+
"neu": 0.7868601986249045,
|
1147 |
+
"pos": 0.5735115431348725,
|
1148 |
+
"q": 0.29885057471264365
|
1149 |
+
},
|
1150 |
+
"eval_loss": 1.261472225189209,
|
1151 |
+
"eval_macro_average_f1": 0.6140243291181051,
|
1152 |
+
"eval_micro_average_f1": 0.7441763727121464,
|
1153 |
+
"eval_runtime": 7.5114,
|
1154 |
+
"eval_samples_per_second": 320.048,
|
1155 |
+
"eval_steps_per_second": 20.103,
|
1156 |
+
"step": 6600
|
1157 |
+
},
|
1158 |
+
{
|
1159 |
+
"epoch": 4.955621301775148,
|
1160 |
+
"eval_class_f1": {
|
1161 |
+
"neg": 0.7930763178599529,
|
1162 |
+
"neu": 0.7766536964980545,
|
1163 |
+
"pos": 0.5714285714285715,
|
1164 |
+
"q": 0.35294117647058826
|
1165 |
+
},
|
1166 |
+
"eval_loss": 1.2753080129623413,
|
1167 |
+
"eval_macro_average_f1": 0.6235249405642919,
|
1168 |
+
"eval_micro_average_f1": 0.7358569051580699,
|
1169 |
+
"eval_runtime": 7.3656,
|
1170 |
+
"eval_samples_per_second": 326.381,
|
1171 |
+
"eval_steps_per_second": 20.501,
|
1172 |
+
"step": 6700
|
1173 |
+
},
|
1174 |
+
{
|
1175 |
+
"epoch": 5.029585798816568,
|
1176 |
+
"eval_class_f1": {
|
1177 |
+
"neg": 0.7962962962962963,
|
1178 |
+
"neu": 0.7754943776657619,
|
1179 |
+
"pos": 0.5657276995305165,
|
1180 |
+
"q": 0.345679012345679
|
1181 |
+
},
|
1182 |
+
"eval_loss": 1.3079357147216797,
|
1183 |
+
"eval_macro_average_f1": 0.6207993464595634,
|
1184 |
+
"eval_micro_average_f1": 0.7366888519134775,
|
1185 |
+
"eval_runtime": 7.4026,
|
1186 |
+
"eval_samples_per_second": 324.753,
|
1187 |
+
"eval_steps_per_second": 20.398,
|
1188 |
+
"step": 6800
|
1189 |
+
},
|
1190 |
+
{
|
1191 |
+
"epoch": 5.103550295857988,
|
1192 |
+
"eval_class_f1": {
|
1193 |
+
"neg": 0.7972136222910216,
|
1194 |
+
"neu": 0.7786790266512167,
|
1195 |
+
"pos": 0.5721040189125295,
|
1196 |
+
"q": 0.3703703703703704
|
1197 |
+
},
|
1198 |
+
"eval_loss": 1.3499900102615356,
|
1199 |
+
"eval_macro_average_f1": 0.6295917595562845,
|
1200 |
+
"eval_micro_average_f1": 0.740432612312812,
|
1201 |
+
"eval_runtime": 7.3777,
|
1202 |
+
"eval_samples_per_second": 325.846,
|
1203 |
+
"eval_steps_per_second": 20.467,
|
1204 |
+
"step": 6900
|
1205 |
+
},
|
1206 |
+
{
|
1207 |
+
"epoch": 5.177514792899408,
|
1208 |
+
"grad_norm": 11.024497985839844,
|
1209 |
+
"learning_rate": 4.598630136986302e-06,
|
1210 |
+
"loss": 0.1111,
|
1211 |
+
"step": 7000
|
1212 |
+
},
|
1213 |
+
{
|
1214 |
+
"epoch": 5.177514792899408,
|
1215 |
+
"eval_class_f1": {
|
1216 |
+
"neg": 0.7956147220046985,
|
1217 |
+
"neu": 0.7807853602744949,
|
1218 |
+
"pos": 0.5693606755126658,
|
1219 |
+
"q": 0.3544303797468354
|
1220 |
+
},
|
1221 |
+
"eval_loss": 1.4051584005355835,
|
1222 |
+
"eval_macro_average_f1": 0.6250477843846737,
|
1223 |
+
"eval_micro_average_f1": 0.7412645590682196,
|
1224 |
+
"eval_runtime": 7.2531,
|
1225 |
+
"eval_samples_per_second": 331.444,
|
1226 |
+
"eval_steps_per_second": 20.819,
|
1227 |
+
"step": 7000
|
1228 |
+
},
|
1229 |
+
{
|
1230 |
+
"epoch": 5.2514792899408285,
|
1231 |
+
"eval_class_f1": {
|
1232 |
+
"neg": 0.7925407925407926,
|
1233 |
+
"neu": 0.7769230769230769,
|
1234 |
+
"pos": 0.5737898465171192,
|
1235 |
+
"q": 0.27027027027027023
|
1236 |
+
},
|
1237 |
+
"eval_loss": 1.4020917415618896,
|
1238 |
+
"eval_macro_average_f1": 0.6033809965628147,
|
1239 |
+
"eval_micro_average_f1": 0.7375207986688852,
|
1240 |
+
"eval_runtime": 7.404,
|
1241 |
+
"eval_samples_per_second": 324.69,
|
1242 |
+
"eval_steps_per_second": 20.394,
|
1243 |
+
"step": 7100
|
1244 |
+
},
|
1245 |
+
{
|
1246 |
+
"epoch": 5.325443786982248,
|
1247 |
+
"eval_class_f1": {
|
1248 |
+
"neg": 0.7949326999208235,
|
1249 |
+
"neu": 0.7753846153846154,
|
1250 |
+
"pos": 0.5727482678983833,
|
1251 |
+
"q": 0.27848101265822783
|
1252 |
+
},
|
1253 |
+
"eval_loss": 1.4238033294677734,
|
1254 |
+
"eval_macro_average_f1": 0.6053866489655125,
|
1255 |
+
"eval_micro_average_f1": 0.7358569051580699,
|
1256 |
+
"eval_runtime": 7.4328,
|
1257 |
+
"eval_samples_per_second": 323.433,
|
1258 |
+
"eval_steps_per_second": 20.315,
|
1259 |
+
"step": 7200
|
1260 |
+
},
|
1261 |
+
{
|
1262 |
+
"epoch": 5.399408284023669,
|
1263 |
+
"eval_class_f1": {
|
1264 |
+
"neg": 0.7969348659003831,
|
1265 |
+
"neu": 0.7798306389530408,
|
1266 |
+
"pos": 0.5721212121212121,
|
1267 |
+
"q": 0.3
|
1268 |
+
},
|
1269 |
+
"eval_loss": 1.4431192874908447,
|
1270 |
+
"eval_macro_average_f1": 0.612221679243659,
|
1271 |
+
"eval_micro_average_f1": 0.7408485856905158,
|
1272 |
+
"eval_runtime": 7.3682,
|
1273 |
+
"eval_samples_per_second": 326.266,
|
1274 |
+
"eval_steps_per_second": 20.493,
|
1275 |
+
"step": 7300
|
1276 |
+
},
|
1277 |
+
{
|
1278 |
+
"epoch": 5.4733727810650885,
|
1279 |
+
"eval_class_f1": {
|
1280 |
+
"neg": 0.7940717628705148,
|
1281 |
+
"neu": 0.783072817384674,
|
1282 |
+
"pos": 0.5703883495145632,
|
1283 |
+
"q": 0.3037974683544304
|
1284 |
+
},
|
1285 |
+
"eval_loss": 1.4316595792770386,
|
1286 |
+
"eval_macro_average_f1": 0.6128325995310456,
|
1287 |
+
"eval_micro_average_f1": 0.7416805324459235,
|
1288 |
+
"eval_runtime": 7.3736,
|
1289 |
+
"eval_samples_per_second": 326.03,
|
1290 |
+
"eval_steps_per_second": 20.479,
|
1291 |
+
"step": 7400
|
1292 |
+
},
|
1293 |
+
{
|
1294 |
+
"epoch": 5.547337278106509,
|
1295 |
+
"grad_norm": 0.4265735149383545,
|
1296 |
+
"learning_rate": 2.543835616438356e-06,
|
1297 |
+
"loss": 0.0933,
|
1298 |
+
"step": 7500
|
1299 |
+
},
|
1300 |
+
{
|
1301 |
+
"epoch": 5.547337278106509,
|
1302 |
+
"eval_class_f1": {
|
1303 |
+
"neg": 0.7978311386522074,
|
1304 |
+
"neu": 0.7788089713843775,
|
1305 |
+
"pos": 0.567409144196952,
|
1306 |
+
"q": 0.30769230769230765
|
1307 |
+
},
|
1308 |
+
"eval_loss": 1.4399964809417725,
|
1309 |
+
"eval_macro_average_f1": 0.6129353904814612,
|
1310 |
+
"eval_micro_average_f1": 0.7387687188019967,
|
1311 |
+
"eval_runtime": 7.2697,
|
1312 |
+
"eval_samples_per_second": 330.689,
|
1313 |
+
"eval_steps_per_second": 20.771,
|
1314 |
+
"step": 7500
|
1315 |
+
},
|
1316 |
+
{
|
1317 |
+
"epoch": 5.621301775147929,
|
1318 |
+
"eval_class_f1": {
|
1319 |
+
"neg": 0.7984375,
|
1320 |
+
"neu": 0.7812379853902346,
|
1321 |
+
"pos": 0.5714285714285714,
|
1322 |
+
"q": 0.32500000000000007
|
1323 |
+
},
|
1324 |
+
"eval_loss": 1.4240373373031616,
|
1325 |
+
"eval_macro_average_f1": 0.6190260142047015,
|
1326 |
+
"eval_micro_average_f1": 0.7412645590682196,
|
1327 |
+
"eval_runtime": 7.4341,
|
1328 |
+
"eval_samples_per_second": 323.375,
|
1329 |
+
"eval_steps_per_second": 20.312,
|
1330 |
+
"step": 7600
|
1331 |
+
},
|
1332 |
+
{
|
1333 |
+
"epoch": 5.695266272189349,
|
1334 |
+
"eval_class_f1": {
|
1335 |
+
"neg": 0.7987470634299139,
|
1336 |
+
"neu": 0.7843286420692278,
|
1337 |
+
"pos": 0.5703883495145632,
|
1338 |
+
"q": 0.30769230769230765
|
1339 |
+
},
|
1340 |
+
"eval_loss": 1.4332064390182495,
|
1341 |
+
"eval_macro_average_f1": 0.6152890906765031,
|
1342 |
+
"eval_micro_average_f1": 0.7437603993344426,
|
1343 |
+
"eval_runtime": 7.4434,
|
1344 |
+
"eval_samples_per_second": 322.969,
|
1345 |
+
"eval_steps_per_second": 20.286,
|
1346 |
+
"step": 7700
|
1347 |
+
},
|
1348 |
+
{
|
1349 |
+
"epoch": 5.769230769230769,
|
1350 |
+
"eval_class_f1": {
|
1351 |
+
"neg": 0.7981220657276996,
|
1352 |
+
"neu": 0.781874039938556,
|
1353 |
+
"pos": 0.5731132075471698,
|
1354 |
+
"q": 0.30769230769230765
|
1355 |
+
},
|
1356 |
+
"eval_loss": 1.4344979524612427,
|
1357 |
+
"eval_macro_average_f1": 0.6152004052264332,
|
1358 |
+
"eval_micro_average_f1": 0.7416805324459235,
|
1359 |
+
"eval_runtime": 7.3808,
|
1360 |
+
"eval_samples_per_second": 325.708,
|
1361 |
+
"eval_steps_per_second": 20.458,
|
1362 |
+
"step": 7800
|
1363 |
+
},
|
1364 |
+
{
|
1365 |
+
"epoch": 5.84319526627219,
|
1366 |
+
"eval_class_f1": {
|
1367 |
+
"neg": 0.7990654205607477,
|
1368 |
+
"neu": 0.7815384615384616,
|
1369 |
+
"pos": 0.5724465558194775,
|
1370 |
+
"q": 0.3414634146341463
|
1371 |
+
},
|
1372 |
+
"eval_loss": 1.4412455558776855,
|
1373 |
+
"eval_macro_average_f1": 0.6236284631382082,
|
1374 |
+
"eval_micro_average_f1": 0.7420965058236273,
|
1375 |
+
"eval_runtime": 7.3915,
|
1376 |
+
"eval_samples_per_second": 325.237,
|
1377 |
+
"eval_steps_per_second": 20.429,
|
1378 |
+
"step": 7900
|
1379 |
+
},
|
1380 |
+
{
|
1381 |
+
"epoch": 5.9171597633136095,
|
1382 |
+
"grad_norm": 16.41318702697754,
|
1383 |
+
"learning_rate": 4.89041095890411e-07,
|
1384 |
+
"loss": 0.1006,
|
1385 |
+
"step": 8000
|
1386 |
+
},
|
1387 |
+
{
|
1388 |
+
"epoch": 5.9171597633136095,
|
1389 |
+
"eval_class_f1": {
|
1390 |
+
"neg": 0.7987519500780033,
|
1391 |
+
"neu": 0.7813098429720413,
|
1392 |
+
"pos": 0.5700598802395208,
|
1393 |
+
"q": 0.32500000000000007
|
1394 |
+
},
|
1395 |
+
"eval_loss": 1.4469937086105347,
|
1396 |
+
"eval_macro_average_f1": 0.6187804183223914,
|
1397 |
+
"eval_micro_average_f1": 0.7416805324459235,
|
1398 |
+
"eval_runtime": 7.3689,
|
1399 |
+
"eval_samples_per_second": 326.236,
|
1400 |
+
"eval_steps_per_second": 20.492,
|
1401 |
+
"step": 8000
|
1402 |
+
},
|
1403 |
+
{
|
1404 |
+
"epoch": 5.991124260355029,
|
1405 |
+
"eval_class_f1": {
|
1406 |
+
"neg": 0.7990654205607477,
|
1407 |
+
"neu": 0.781441717791411,
|
1408 |
+
"pos": 0.569377990430622,
|
1409 |
+
"q": 0.32500000000000007
|
1410 |
+
},
|
1411 |
+
"eval_loss": 1.4454258680343628,
|
1412 |
+
"eval_macro_average_f1": 0.6187212821956952,
|
1413 |
+
"eval_micro_average_f1": 0.7416805324459235,
|
1414 |
+
"eval_runtime": 7.448,
|
1415 |
+
"eval_samples_per_second": 322.77,
|
1416 |
+
"eval_steps_per_second": 20.274,
|
1417 |
+
"step": 8100
|
1418 |
+
}
|
1419 |
+
],
|
1420 |
+
"logging_steps": 500,
|
1421 |
+
"max_steps": 8112,
|
1422 |
+
"num_input_tokens_seen": 0,
|
1423 |
+
"num_train_epochs": 6,
|
1424 |
+
"save_steps": 100,
|
1425 |
+
"total_flos": 1.0485727069042368e+16,
|
1426 |
+
"train_batch_size": 16,
|
1427 |
+
"trial_name": null,
|
1428 |
+
"trial_params": null
|
1429 |
+
}
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d136bd3a72331c1ee1902b7c4073c9b6bb6f902bd3ce56bb0ff2ee4eab86fe5c
|
3 |
+
size 5048
|