lenkaB commited on
Commit
170c9f3
·
verified ·
1 Parent(s): 9f085cc

Initial commit

Browse files
Files changed (4) hide show
  1. config.json +610 -0
  2. model_args.json +1 -0
  3. special_tokens_map.json +7 -0
  4. vocab.txt +0 -0
config.json ADDED
@@ -0,0 +1,610 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "classla/bcms-bertic",
3
+ "architectures": [
4
+ "ElectraForTokenClassification"
5
+ ],
6
+ "attention_probs_dropout_prob": 0.1,
7
+ "classifier_dropout": null,
8
+ "embedding_size": 768,
9
+ "hidden_act": "gelu",
10
+ "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
+ "id2label": {
13
+ "0": "[0, '', 0, '']",
14
+ "1": "[1, 'evar', 0, '']",
15
+ "2": "[1, 'a', 0, '']",
16
+ "3": "[1, '', 0, '']",
17
+ "4": "[1, 'oj', 0, '']",
18
+ "5": "[2, '', 0, '']",
19
+ "6": "[6, 'ao', 0, '']",
20
+ "7": "[3, 'ivati', 0, '']",
21
+ "8": "[0, 'ti', 0, '']",
22
+ "9": "[0, 'be', 0, '']",
23
+ "10": "[2, 'an', 0, '']",
24
+ "11": "[4, 'ao', 0, '']",
25
+ "12": "[1, 'i', 0, '']",
26
+ "13": "[2, 'biti', 0, '']",
27
+ "14": "[2, 'iti', 0, '']",
28
+ "15": "[2, 'aj', 0, '']",
29
+ "16": "[1, 'e', 0, '']",
30
+ "17": "[3, 'an', 0, '']",
31
+ "18": "[2, 'ar', 0, '']",
32
+ "19": "[2, 'ji', 0, '']",
33
+ "20": "[1, 'eti', 0, '']",
34
+ "21": "[2, 'ti', 0, '']",
35
+ "22": "[3, 'lac', 0, '']",
36
+ "23": "[1, 'iti', 0, '']",
37
+ "24": "[1, 'o', 0, '']",
38
+ "25": "[2, 'av', 0, '']",
39
+ "26": "[3, 'iti', 0, '']",
40
+ "27": "[4, 'ti', 0, '']",
41
+ "28": "[4, 'iti', 0, '']",
42
+ "29": "[1, 'ji', 0, '']",
43
+ "30": "[3, '', 0, '']",
44
+ "31": "[2, 'ak', 0, '']",
45
+ "32": "[6, 'ak', 0, '']",
46
+ "33": "[5, '', 0, '']",
47
+ "34": "[3, 'o', 0, '']",
48
+ "35": "[3, 'aj', 0, '']",
49
+ "36": "[0, 'ti', 1, 'ht']",
50
+ "37": "[2, 'ti', 1, 'ht']",
51
+ "38": "[3, 'ak', 0, '']",
52
+ "39": "[2, 'i', 0, '']",
53
+ "40": "[3, 'ovati', 0, '']",
54
+ "41": "[1, 'sti', 0, '']",
55
+ "42": "[3, 'ka', 0, '']",
56
+ "43": "[2, 'a', 0, '']",
57
+ "44": "[4, 'li', 0, '']",
58
+ "45": "[2, '\u0107i', 0, '']",
59
+ "46": "[3, 'av', 0, '']",
60
+ "47": "[4, 'k', 0, '']",
61
+ "48": "[0, 'j', 0, '']",
62
+ "49": "[3, 'e', 0, '']",
63
+ "50": "[4, 'li', 3, '']",
64
+ "51": "[3, '', 0, 'o']",
65
+ "52": "[1, 'aj', 0, '']",
66
+ "53": "[2, 'eti', 0, '']",
67
+ "54": "[3, 'lik', 3, '']",
68
+ "55": "[2, 'zati', 0, '']",
69
+ "56": "[4, 'biti', 0, '']",
70
+ "57": "[1, '', 0, 'on']",
71
+ "58": "[1, 'ti', 0, '']",
72
+ "59": "[2, 'ti', 1, 'b']",
73
+ "60": "[5, 'iti', 0, '']",
74
+ "61": "[1, 'j', 0, '']",
75
+ "62": "[2, 'ka', 0, '']",
76
+ "63": "[5, 'ivati', 0, '']",
77
+ "64": "[4, 'ak', 0, '']",
78
+ "65": "[2, 'vati', 0, '']",
79
+ "66": "[3, 'st', 0, '']",
80
+ "67": "[3, 'ek', 0, '']",
81
+ "68": "[0, 'a', 0, '']",
82
+ "69": "[2, 'at', 0, '']",
83
+ "70": "[2, 'ac', 0, '']",
84
+ "71": "[1, 'ica', 0, '']",
85
+ "72": "[1, 'i\u010dan', 0, '']",
86
+ "73": "[1, 'arta', 0, '']",
87
+ "74": "[2, 'k', 0, '']",
88
+ "75": "[2, 'o', 0, '']",
89
+ "76": "[1, 'astarski', 0, '']",
90
+ "77": "[1, 'ojan', 0, '']",
91
+ "78": "[4, 'o', 0, '']",
92
+ "79": "[4, 'ac', 0, '']",
93
+ "80": "[3, 'o', 3, '']",
94
+ "81": "[3, 'vo', 0, '']",
95
+ "82": "[2, 'cati', 0, '']",
96
+ "83": "[1, 'ati', 0, '']",
97
+ "84": "[2, 'tak', 0, '']",
98
+ "85": "[2, 'on', 0, '']",
99
+ "86": "[2, 'ao', 0, '']",
100
+ "87": "[3, '\u0107i', 0, '']",
101
+ "88": "[3, 'ac', 0, '']",
102
+ "89": "[3, 'ar', 0, '']",
103
+ "90": "[4, '', 0, 'o']",
104
+ "91": "[3, 't', 0, '']",
105
+ "92": "[2, 'sok', 0, '']",
106
+ "93": "[2, '.o.o.', 0, '']",
107
+ "94": "[3, 'lo', 0, '']",
108
+ "95": "[3, 'ati', 0, '']",
109
+ "96": "[3, 'biti', 0, '']",
110
+ "97": "[2, 'g', 0, '']",
111
+ "98": "[5, 'an', 3, '']",
112
+ "99": "[1, 'istarski', 0, '']",
113
+ "100": "[3, 'tan', 0, '']",
114
+ "101": "[0, '', 3, '\u0161t']",
115
+ "102": "[4, '\u0161ta', 0, '']",
116
+ "103": "[2, 'sti', 0, '']",
117
+ "104": "[5, '\u0107i', 0, '']",
118
+ "105": "[1, 'an', 0, '']",
119
+ "106": "[2, 'lik', 0, '']",
120
+ "107": "[1, 'rokuplje', 0, '']",
121
+ "108": "[1, 'epublika', 0, '']",
122
+ "109": "[3, 'sti', 0, '']",
123
+ "110": "[2, 'eko', 0, '']",
124
+ "111": "[4, 'diti', 0, '']",
125
+ "112": "[2, 'ga', 0, '']",
126
+ "113": "[4, 'lac', 0, '']",
127
+ "114": "[0, 'i', 0, '']",
128
+ "115": "[4, 'av', 0, '']",
129
+ "116": "[2, 'zu', 0, '']",
130
+ "117": "[3, 'zak', 0, '']",
131
+ "118": "[4, 'titi', 0, '']",
132
+ "119": "[3, 'tak', 3, '']",
133
+ "120": "[4, 'vati', 0, '']",
134
+ "121": "[3, 'lo', 3, '']",
135
+ "122": "[3, 'ba', 0, '']",
136
+ "123": "[1, 'u\u017eben', 0, '']",
137
+ "124": "[1, 'ugi', 0, '']",
138
+ "125": "[4, 'at', 0, '']",
139
+ "126": "[3, 'titi', 0, '']",
140
+ "127": "[2, 'sati', 0, '']",
141
+ "128": "[4, 'deti', 0, '']",
142
+ "129": "[5, 'stiti', 0, '']",
143
+ "130": "[6, 'ogo', 0, 'm']",
144
+ "131": "[3, 'deti', 0, '']",
145
+ "132": "[4, 'an', 0, '']",
146
+ "133": "[2, 'tan', 0, '']",
147
+ "134": "[2, 'gati', 0, '']",
148
+ "135": "[1, 'i', 3, 'bi']",
149
+ "136": "[4, 'mnogo', 0, '']",
150
+ "137": "[4, 'g', 0, '']",
151
+ "138": "[4, '', 0, '']",
152
+ "139": "[2, 'ek', 0, '']",
153
+ "140": "[2, 'nuti', 0, '']",
154
+ "141": "[5, 'lac', 0, '']",
155
+ "142": "[1, 'uti', 0, '']",
156
+ "143": "[3, 'li', 0, '']",
157
+ "144": "[1, 'i\u010dno', 0, '']",
158
+ "145": "[6, 'r', 1, 'dob']",
159
+ "146": "[4, '\u0161to', 0, '']",
160
+ "147": "[2, 'zak', 0, '']",
161
+ "148": "[1, 'r\u0161ilac', 0, '']",
162
+ "149": "[1, 'u\u017enost', 0, '']",
163
+ "150": "[1, 'av', 0, '']",
164
+ "151": "[4, 'am', 0, '']",
165
+ "152": "[3, 'diti', 0, '']",
166
+ "153": "[4, 'tak', 0, '']",
167
+ "154": "[2, 'ha', 0, '']",
168
+ "155": "[5, 'slati', 0, '']",
169
+ "156": "[3, 'lik', 0, '']",
170
+ "157": "[1, 'ok', 0, '']",
171
+ "158": "[4, 'i', 0, 'o']",
172
+ "159": "[2, 'go', 0, '']",
173
+ "160": "[2, 'te', 0, '']",
174
+ "161": "[1, 'ak', 0, '']",
175
+ "162": "[4, 'ziti', 0, '']",
176
+ "163": "[2, 'tac', 0, '']",
177
+ "164": "[4, 'an', 3, '']",
178
+ "165": "[2, 'tko', 0, '']",
179
+ "166": "[2, 'zak', 3, '']",
180
+ "167": "[3, 'sok', 0, '']",
181
+ "168": "[3, 'ao', 0, '']",
182
+ "169": "[3, 'g', 0, '']",
183
+ "170": "[2, 'sko', 0, '']",
184
+ "171": "[2, 'tati', 0, '']",
185
+ "172": "[4, '\u0107i', 0, '']",
186
+ "173": "[1, 'a\u010dka', 0, '']",
187
+ "174": "[2, 'eko', 3, '']",
188
+ "175": "[3, 'i', 0, 'o']",
189
+ "176": "[6, 'o', 0, '']",
190
+ "177": "[2, 'go', 3, '']",
191
+ "178": "[3, 'avati', 0, '']",
192
+ "179": "[2, '', 0, 'o']",
193
+ "180": "[0, '', 3, 'sl']",
194
+ "181": "[3, 'siti', 0, '']",
195
+ "182": "[1, 'agistar', 0, '']",
196
+ "183": "[6, 'iti', 0, '']",
197
+ "184": "[4, 'citi', 0, '']",
198
+ "185": "[3, 'vojeru\u010dno', 0, '']",
199
+ "186": "[4, 'gati', 0, '']",
200
+ "187": "[4, 'stiti', 0, '']",
201
+ "188": "[3, 'ti', 0, '']",
202
+ "189": "[1, 'odina', 0, '']",
203
+ "190": "[1, '\u0161i', 0, '']",
204
+ "191": "[1, 'u\u0107an', 0, '']",
205
+ "192": "[1, 'resuda', 0, '']",
206
+ "193": "[1, 'p\u0161tinski', 0, '']",
207
+ "194": "[1, 'ud', 0, '']",
208
+ "195": "[2, 'an', 5, 'prav']",
209
+ "196": "[1, 'okat', 0, '']",
210
+ "197": "[1, 'en', 0, '']",
211
+ "198": "[1, 'oletan', 0, '']",
212
+ "199": "[3, 'ta', 0, '']",
213
+ "200": "[3, 'tac', 0, '']",
214
+ "201": "[2, 'am', 0, '']",
215
+ "202": "[1, 'izija', 0, '']",
216
+ "203": "[4, 'lja', 0, '']",
217
+ "204": "[1, 'omo\u0107nik', 0, '']",
218
+ "205": "[2, 'j', 0, '']",
219
+ "206": "[7, 'r', 1, 'dob']",
220
+ "207": "[1, 'ka', 0, '']",
221
+ "208": "[3, 'i', 0, '']",
222
+ "209": "[5, 'ki', 0, '']",
223
+ "210": "[1, 'n', 1, '']",
224
+ "211": "[1, 'ar\u0161al', 0, '']",
225
+ "212": "[5, 'k', 1, 'uz']",
226
+ "213": "[1, 'ovodan', 0, '']",
227
+ "214": "[4, 'sti', 0, '']",
228
+ "215": "[5, 'ovati', 0, '']",
229
+ "216": "[3, 'ajati', 0, '']",
230
+ "217": "[3, 'zac', 0, '']",
231
+ "218": "[0, 'k', 0, '']",
232
+ "219": "[2, '', 5, 'isposa']",
233
+ "220": "[3, 'a', 0, '']",
234
+ "221": "[4, 'tati', 0, '']",
235
+ "222": "[1, 'ana', 0, '']",
236
+ "223": "[2, 'hteti', 0, '']",
237
+ "224": "[3, 'teti', 0, '']",
238
+ "225": "[1, 'esor', 0, '']",
239
+ "226": "[3, 'li', 3, '']",
240
+ "227": "[0, 'ar', 0, '']",
241
+ "228": "[2, 'oktor', 0, '']",
242
+ "229": "[5, '\u010dovek', 0, '']",
243
+ "230": "[3, 'ti', 1, 'b']",
244
+ "231": "[3, 'anj', 0, '']",
245
+ "232": "[4, 'siti', 0, '']",
246
+ "233": "[2, 'lik', 3, '']",
247
+ "234": "[1, 'uriranje', 0, '']",
248
+ "235": "[5, 'sti', 0, '']",
249
+ "236": "[3, 'dak', 0, '']",
250
+ "237": "[6, 'stiti', 0, '']",
251
+ "238": "[0, 'ugi', 0, '']",
252
+ "239": "[4, 'ar', 0, 'do']",
253
+ "240": "[4, 'ivati', 0, '']",
254
+ "241": "[2, 'ja', 0, '']",
255
+ "242": "[1, 'amostalno', 0, '']",
256
+ "243": "[1, 'reduze\u0107e', 0, '']",
257
+ "244": "[2, 'zo', 0, '']",
258
+ "245": "[2, 'n', 0, '']",
259
+ "246": "[5, 'an', 0, '']",
260
+ "247": "[1, 'in', 0, '']",
261
+ "248": "[3, 'at', 0, '']",
262
+ "249": "[3, 'slan', 0, '']",
263
+ "250": "[6, 'ar', 0, '']",
264
+ "251": "[5, 'ent', 0, '']",
265
+ "252": "[3, 'ti', 1, 'sl']",
266
+ "253": "[3, 'tak', 0, '']",
267
+ "254": "[1, 'osno', 0, '']",
268
+ "255": "[4, 'cati', 0, '']",
269
+ "256": "[3, 'mo', 0, '']",
270
+ "257": "[1, 'efon', 0, '']",
271
+ "258": "[1, 'ugo', 0, '']",
272
+ "259": "[1, 'omiran', 0, '']",
273
+ "260": "[4, 'zak', 0, '']",
274
+ "261": "[1, 'ina', 0, '']",
275
+ "262": "[3, 'vati', 0, '']",
276
+ "263": "[1, 'oktor', 0, '']",
277
+ "264": "[0, 'an', 0, '']",
278
+ "265": "[0, 'ica', 0, '']",
279
+ "266": "[3, '\u0161ta', 0, '']",
280
+ "267": "[4, 'ti', 2, '']",
281
+ "268": "[2, 'ti', 4, 'inten']",
282
+ "269": "[0, '.', 0, '']",
283
+ "270": "[2, 'sok', 3, '']",
284
+ "271": "[0, 'botica', 0, '']",
285
+ "272": "[1, 'ektri\u010dan', 0, '']",
286
+ "273": "[1, 'i', 2, '']",
287
+ "274": "[0, 'ti', 1, 'b']",
288
+ "275": "[1, 'ena', 0, '']",
289
+ "276": "[2, '', 5, 'vodod']",
290
+ "277": "[5, 'zniti', 0, '']",
291
+ "278": "[0, 'n', 0, '']",
292
+ "279": "[2, 'g', 3, '']",
293
+ "280": "[4, 'len', 0, '']",
294
+ "281": "[1, 'omo\u0107je', 0, '']",
295
+ "282": "[2, 'ati', 0, '']",
296
+ "283": "[1, 'ospodin', 0, '']",
297
+ "284": "[2, '', 11, 'prvotu\u017een']",
298
+ "285": "[4, 'dak', 0, '']",
299
+ "286": "[3, 'len', 0, '']",
300
+ "287": "[2, 'ok', 0, '']"
301
+ },
302
+ "initializer_range": 0.02,
303
+ "intermediate_size": 3072,
304
+ "label2id": {
305
+ "[0, '', 0, '']": 0,
306
+ "[0, '', 3, 'sl']": 180,
307
+ "[0, '', 3, '\u0161t']": 101,
308
+ "[0, '.', 0, '']": 269,
309
+ "[0, 'a', 0, '']": 68,
310
+ "[0, 'an', 0, '']": 264,
311
+ "[0, 'ar', 0, '']": 227,
312
+ "[0, 'be', 0, '']": 9,
313
+ "[0, 'botica', 0, '']": 271,
314
+ "[0, 'i', 0, '']": 114,
315
+ "[0, 'ica', 0, '']": 265,
316
+ "[0, 'j', 0, '']": 48,
317
+ "[0, 'k', 0, '']": 218,
318
+ "[0, 'n', 0, '']": 278,
319
+ "[0, 'ti', 0, '']": 8,
320
+ "[0, 'ti', 1, 'b']": 274,
321
+ "[0, 'ti', 1, 'ht']": 36,
322
+ "[0, 'ugi', 0, '']": 238,
323
+ "[1, '', 0, '']": 3,
324
+ "[1, '', 0, 'on']": 57,
325
+ "[1, 'a', 0, '']": 2,
326
+ "[1, 'agistar', 0, '']": 182,
327
+ "[1, 'aj', 0, '']": 52,
328
+ "[1, 'ak', 0, '']": 161,
329
+ "[1, 'amostalno', 0, '']": 242,
330
+ "[1, 'an', 0, '']": 105,
331
+ "[1, 'ana', 0, '']": 222,
332
+ "[1, 'arta', 0, '']": 73,
333
+ "[1, 'ar\u0161al', 0, '']": 211,
334
+ "[1, 'astarski', 0, '']": 76,
335
+ "[1, 'ati', 0, '']": 83,
336
+ "[1, 'av', 0, '']": 150,
337
+ "[1, 'a\u010dka', 0, '']": 173,
338
+ "[1, 'e', 0, '']": 16,
339
+ "[1, 'efon', 0, '']": 257,
340
+ "[1, 'ektri\u010dan', 0, '']": 272,
341
+ "[1, 'en', 0, '']": 197,
342
+ "[1, 'ena', 0, '']": 275,
343
+ "[1, 'epublika', 0, '']": 108,
344
+ "[1, 'esor', 0, '']": 225,
345
+ "[1, 'eti', 0, '']": 20,
346
+ "[1, 'evar', 0, '']": 1,
347
+ "[1, 'i', 0, '']": 12,
348
+ "[1, 'i', 2, '']": 273,
349
+ "[1, 'i', 3, 'bi']": 135,
350
+ "[1, 'ica', 0, '']": 71,
351
+ "[1, 'in', 0, '']": 247,
352
+ "[1, 'ina', 0, '']": 261,
353
+ "[1, 'istarski', 0, '']": 99,
354
+ "[1, 'iti', 0, '']": 23,
355
+ "[1, 'izija', 0, '']": 202,
356
+ "[1, 'i\u010dan', 0, '']": 72,
357
+ "[1, 'i\u010dno', 0, '']": 144,
358
+ "[1, 'j', 0, '']": 61,
359
+ "[1, 'ji', 0, '']": 29,
360
+ "[1, 'ka', 0, '']": 207,
361
+ "[1, 'n', 1, '']": 210,
362
+ "[1, 'o', 0, '']": 24,
363
+ "[1, 'odina', 0, '']": 189,
364
+ "[1, 'oj', 0, '']": 4,
365
+ "[1, 'ojan', 0, '']": 77,
366
+ "[1, 'ok', 0, '']": 157,
367
+ "[1, 'okat', 0, '']": 196,
368
+ "[1, 'oktor', 0, '']": 263,
369
+ "[1, 'oletan', 0, '']": 198,
370
+ "[1, 'omiran', 0, '']": 259,
371
+ "[1, 'omo\u0107je', 0, '']": 281,
372
+ "[1, 'omo\u0107nik', 0, '']": 204,
373
+ "[1, 'osno', 0, '']": 254,
374
+ "[1, 'ospodin', 0, '']": 283,
375
+ "[1, 'ovodan', 0, '']": 213,
376
+ "[1, 'p\u0161tinski', 0, '']": 193,
377
+ "[1, 'reduze\u0107e', 0, '']": 243,
378
+ "[1, 'resuda', 0, '']": 192,
379
+ "[1, 'rokuplje', 0, '']": 107,
380
+ "[1, 'r\u0161ilac', 0, '']": 148,
381
+ "[1, 'sti', 0, '']": 41,
382
+ "[1, 'ti', 0, '']": 58,
383
+ "[1, 'ud', 0, '']": 194,
384
+ "[1, 'ugi', 0, '']": 124,
385
+ "[1, 'ugo', 0, '']": 258,
386
+ "[1, 'uriranje', 0, '']": 234,
387
+ "[1, 'uti', 0, '']": 142,
388
+ "[1, 'u\u0107an', 0, '']": 191,
389
+ "[1, 'u\u017eben', 0, '']": 123,
390
+ "[1, 'u\u017enost', 0, '']": 149,
391
+ "[1, '\u0161i', 0, '']": 190,
392
+ "[2, '', 0, '']": 5,
393
+ "[2, '', 0, 'o']": 179,
394
+ "[2, '', 11, 'prvotu\u017een']": 284,
395
+ "[2, '', 5, 'isposa']": 219,
396
+ "[2, '', 5, 'vodod']": 276,
397
+ "[2, '.o.o.', 0, '']": 93,
398
+ "[2, 'a', 0, '']": 43,
399
+ "[2, 'ac', 0, '']": 70,
400
+ "[2, 'aj', 0, '']": 15,
401
+ "[2, 'ak', 0, '']": 31,
402
+ "[2, 'am', 0, '']": 201,
403
+ "[2, 'an', 0, '']": 10,
404
+ "[2, 'an', 5, 'prav']": 195,
405
+ "[2, 'ao', 0, '']": 86,
406
+ "[2, 'ar', 0, '']": 18,
407
+ "[2, 'at', 0, '']": 69,
408
+ "[2, 'ati', 0, '']": 282,
409
+ "[2, 'av', 0, '']": 25,
410
+ "[2, 'biti', 0, '']": 13,
411
+ "[2, 'cati', 0, '']": 82,
412
+ "[2, 'ek', 0, '']": 139,
413
+ "[2, 'eko', 0, '']": 110,
414
+ "[2, 'eko', 3, '']": 174,
415
+ "[2, 'eti', 0, '']": 53,
416
+ "[2, 'g', 0, '']": 97,
417
+ "[2, 'g', 3, '']": 279,
418
+ "[2, 'ga', 0, '']": 112,
419
+ "[2, 'gati', 0, '']": 134,
420
+ "[2, 'go', 0, '']": 159,
421
+ "[2, 'go', 3, '']": 177,
422
+ "[2, 'ha', 0, '']": 154,
423
+ "[2, 'hteti', 0, '']": 223,
424
+ "[2, 'i', 0, '']": 39,
425
+ "[2, 'iti', 0, '']": 14,
426
+ "[2, 'j', 0, '']": 205,
427
+ "[2, 'ja', 0, '']": 241,
428
+ "[2, 'ji', 0, '']": 19,
429
+ "[2, 'k', 0, '']": 74,
430
+ "[2, 'ka', 0, '']": 62,
431
+ "[2, 'lik', 0, '']": 106,
432
+ "[2, 'lik', 3, '']": 233,
433
+ "[2, 'n', 0, '']": 245,
434
+ "[2, 'nuti', 0, '']": 140,
435
+ "[2, 'o', 0, '']": 75,
436
+ "[2, 'ok', 0, '']": 287,
437
+ "[2, 'oktor', 0, '']": 228,
438
+ "[2, 'on', 0, '']": 85,
439
+ "[2, 'sati', 0, '']": 127,
440
+ "[2, 'sko', 0, '']": 170,
441
+ "[2, 'sok', 0, '']": 92,
442
+ "[2, 'sok', 3, '']": 270,
443
+ "[2, 'sti', 0, '']": 103,
444
+ "[2, 'tac', 0, '']": 163,
445
+ "[2, 'tak', 0, '']": 84,
446
+ "[2, 'tan', 0, '']": 133,
447
+ "[2, 'tati', 0, '']": 171,
448
+ "[2, 'te', 0, '']": 160,
449
+ "[2, 'ti', 0, '']": 21,
450
+ "[2, 'ti', 1, 'b']": 59,
451
+ "[2, 'ti', 1, 'ht']": 37,
452
+ "[2, 'ti', 4, 'inten']": 268,
453
+ "[2, 'tko', 0, '']": 165,
454
+ "[2, 'vati', 0, '']": 65,
455
+ "[2, 'zak', 0, '']": 147,
456
+ "[2, 'zak', 3, '']": 166,
457
+ "[2, 'zati', 0, '']": 55,
458
+ "[2, 'zo', 0, '']": 244,
459
+ "[2, 'zu', 0, '']": 116,
460
+ "[2, '\u0107i', 0, '']": 45,
461
+ "[3, '', 0, '']": 30,
462
+ "[3, '', 0, 'o']": 51,
463
+ "[3, 'a', 0, '']": 220,
464
+ "[3, 'ac', 0, '']": 88,
465
+ "[3, 'aj', 0, '']": 35,
466
+ "[3, 'ajati', 0, '']": 216,
467
+ "[3, 'ak', 0, '']": 38,
468
+ "[3, 'an', 0, '']": 17,
469
+ "[3, 'anj', 0, '']": 231,
470
+ "[3, 'ao', 0, '']": 168,
471
+ "[3, 'ar', 0, '']": 89,
472
+ "[3, 'at', 0, '']": 248,
473
+ "[3, 'ati', 0, '']": 95,
474
+ "[3, 'av', 0, '']": 46,
475
+ "[3, 'avati', 0, '']": 178,
476
+ "[3, 'ba', 0, '']": 122,
477
+ "[3, 'biti', 0, '']": 96,
478
+ "[3, 'dak', 0, '']": 236,
479
+ "[3, 'deti', 0, '']": 131,
480
+ "[3, 'diti', 0, '']": 152,
481
+ "[3, 'e', 0, '']": 49,
482
+ "[3, 'ek', 0, '']": 67,
483
+ "[3, 'g', 0, '']": 169,
484
+ "[3, 'i', 0, '']": 208,
485
+ "[3, 'i', 0, 'o']": 175,
486
+ "[3, 'iti', 0, '']": 26,
487
+ "[3, 'ivati', 0, '']": 7,
488
+ "[3, 'ka', 0, '']": 42,
489
+ "[3, 'lac', 0, '']": 22,
490
+ "[3, 'len', 0, '']": 286,
491
+ "[3, 'li', 0, '']": 143,
492
+ "[3, 'li', 3, '']": 226,
493
+ "[3, 'lik', 0, '']": 156,
494
+ "[3, 'lik', 3, '']": 54,
495
+ "[3, 'lo', 0, '']": 94,
496
+ "[3, 'lo', 3, '']": 121,
497
+ "[3, 'mo', 0, '']": 256,
498
+ "[3, 'o', 0, '']": 34,
499
+ "[3, 'o', 3, '']": 80,
500
+ "[3, 'ovati', 0, '']": 40,
501
+ "[3, 'siti', 0, '']": 181,
502
+ "[3, 'slan', 0, '']": 249,
503
+ "[3, 'sok', 0, '']": 167,
504
+ "[3, 'st', 0, '']": 66,
505
+ "[3, 'sti', 0, '']": 109,
506
+ "[3, 't', 0, '']": 91,
507
+ "[3, 'ta', 0, '']": 199,
508
+ "[3, 'tac', 0, '']": 200,
509
+ "[3, 'tak', 0, '']": 253,
510
+ "[3, 'tak', 3, '']": 119,
511
+ "[3, 'tan', 0, '']": 100,
512
+ "[3, 'teti', 0, '']": 224,
513
+ "[3, 'ti', 0, '']": 188,
514
+ "[3, 'ti', 1, 'b']": 230,
515
+ "[3, 'ti', 1, 'sl']": 252,
516
+ "[3, 'titi', 0, '']": 126,
517
+ "[3, 'vati', 0, '']": 262,
518
+ "[3, 'vo', 0, '']": 81,
519
+ "[3, 'vojeru\u010dno', 0, '']": 185,
520
+ "[3, 'zac', 0, '']": 217,
521
+ "[3, 'zak', 0, '']": 117,
522
+ "[3, '\u0107i', 0, '']": 87,
523
+ "[3, '\u0161ta', 0, '']": 266,
524
+ "[4, '', 0, '']": 138,
525
+ "[4, '', 0, 'o']": 90,
526
+ "[4, 'ac', 0, '']": 79,
527
+ "[4, 'ak', 0, '']": 64,
528
+ "[4, 'am', 0, '']": 151,
529
+ "[4, 'an', 0, '']": 132,
530
+ "[4, 'an', 3, '']": 164,
531
+ "[4, 'ao', 0, '']": 11,
532
+ "[4, 'ar', 0, 'do']": 239,
533
+ "[4, 'at', 0, '']": 125,
534
+ "[4, 'av', 0, '']": 115,
535
+ "[4, 'biti', 0, '']": 56,
536
+ "[4, 'cati', 0, '']": 255,
537
+ "[4, 'citi', 0, '']": 184,
538
+ "[4, 'dak', 0, '']": 285,
539
+ "[4, 'deti', 0, '']": 128,
540
+ "[4, 'diti', 0, '']": 111,
541
+ "[4, 'g', 0, '']": 137,
542
+ "[4, 'gati', 0, '']": 186,
543
+ "[4, 'i', 0, 'o']": 158,
544
+ "[4, 'iti', 0, '']": 28,
545
+ "[4, 'ivati', 0, '']": 240,
546
+ "[4, 'k', 0, '']": 47,
547
+ "[4, 'lac', 0, '']": 113,
548
+ "[4, 'len', 0, '']": 280,
549
+ "[4, 'li', 0, '']": 44,
550
+ "[4, 'li', 3, '']": 50,
551
+ "[4, 'lja', 0, '']": 203,
552
+ "[4, 'mnogo', 0, '']": 136,
553
+ "[4, 'o', 0, '']": 78,
554
+ "[4, 'siti', 0, '']": 232,
555
+ "[4, 'sti', 0, '']": 214,
556
+ "[4, 'stiti', 0, '']": 187,
557
+ "[4, 'tak', 0, '']": 153,
558
+ "[4, 'tati', 0, '']": 221,
559
+ "[4, 'ti', 0, '']": 27,
560
+ "[4, 'ti', 2, '']": 267,
561
+ "[4, 'titi', 0, '']": 118,
562
+ "[4, 'vati', 0, '']": 120,
563
+ "[4, 'zak', 0, '']": 260,
564
+ "[4, 'ziti', 0, '']": 162,
565
+ "[4, '\u0107i', 0, '']": 172,
566
+ "[4, '\u0161ta', 0, '']": 102,
567
+ "[4, '\u0161to', 0, '']": 146,
568
+ "[5, '', 0, '']": 33,
569
+ "[5, 'an', 0, '']": 246,
570
+ "[5, 'an', 3, '']": 98,
571
+ "[5, 'ent', 0, '']": 251,
572
+ "[5, 'iti', 0, '']": 60,
573
+ "[5, 'ivati', 0, '']": 63,
574
+ "[5, 'k', 1, 'uz']": 212,
575
+ "[5, 'ki', 0, '']": 209,
576
+ "[5, 'lac', 0, '']": 141,
577
+ "[5, 'ovati', 0, '']": 215,
578
+ "[5, 'slati', 0, '']": 155,
579
+ "[5, 'sti', 0, '']": 235,
580
+ "[5, 'stiti', 0, '']": 129,
581
+ "[5, 'zniti', 0, '']": 277,
582
+ "[5, '\u0107i', 0, '']": 104,
583
+ "[5, '\u010dovek', 0, '']": 229,
584
+ "[6, 'ak', 0, '']": 32,
585
+ "[6, 'ao', 0, '']": 6,
586
+ "[6, 'ar', 0, '']": 250,
587
+ "[6, 'iti', 0, '']": 183,
588
+ "[6, 'o', 0, '']": 176,
589
+ "[6, 'ogo', 0, 'm']": 130,
590
+ "[6, 'r', 1, 'dob']": 145,
591
+ "[6, 'stiti', 0, '']": 237,
592
+ "[7, 'r', 1, 'dob']": 206
593
+ },
594
+ "layer_norm_eps": 1e-12,
595
+ "max_position_embeddings": 512,
596
+ "model_type": "electra",
597
+ "num_attention_heads": 12,
598
+ "num_hidden_layers": 12,
599
+ "pad_token_id": 0,
600
+ "position_embedding_type": "absolute",
601
+ "summary_activation": "gelu",
602
+ "summary_last_dropout": 0.1,
603
+ "summary_type": "first",
604
+ "summary_use_proj": true,
605
+ "torch_dtype": "float32",
606
+ "transformers_version": "4.40.2",
607
+ "type_vocab_size": 2,
608
+ "use_cache": true,
609
+ "vocab_size": 32000
610
+ }
model_args.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"adafactor_beta1": null, "adafactor_clip_threshold": 1.0, "adafactor_decay_rate": -0.8, "adafactor_eps": [1e-30, 0.001], "adafactor_relative_step": true, "adafactor_scale_parameter": true, "adafactor_warmup_init": true, "adam_betas": [0.9, 0.999], "adam_epsilon": 1e-08, "best_model_dir": "outputs/best_model", "cache_dir": "cache_dir/", "config": {}, "cosine_schedule_num_cycles": 0.5, "custom_layer_parameters": [], "custom_parameter_groups": [], "dataloader_num_workers": 0, "do_lower_case": false, "dynamic_quantize": false, "early_stopping_consider_epochs": false, "early_stopping_delta": 0, "early_stopping_metric": "eval_loss", "early_stopping_metric_minimize": true, "early_stopping_patience": 3, "encoding": null, "eval_batch_size": 100, "evaluate_during_training": false, "evaluate_during_training_silent": true, "evaluate_during_training_steps": 2000, "evaluate_during_training_verbose": false, "evaluate_each_epoch": true, "fp16": true, "gradient_accumulation_steps": 1, "learning_rate": 4e-05, "local_rank": -1, "logging_steps": 50, "loss_type": null, "loss_args": {}, "manual_seed": 64, "max_grad_norm": 1.0, "max_seq_length": 512, "model_name": "classla/bcms-bertic", "model_type": "electra", "multiprocessing_chunksize": -1, "n_gpu": 1, "no_cache": true, "no_save": false, "not_saved_args": [], "num_train_epochs": 20, "optimizer": "AdamW", "output_dir": "outputs/", "overwrite_output_dir": true, "polynomial_decay_schedule_lr_end": 1e-07, "polynomial_decay_schedule_power": 1.0, "process_count": 14, "quantized_model": false, "reprocess_input_data": true, "save_best_model": true, "save_eval_checkpoints": false, "save_model_every_epoch": false, "save_optimizer_and_scheduler": true, "save_steps": 2000, "scheduler": "linear_schedule_with_warmup", "silent": true, "skip_special_tokens": true, "tensorboard_dir": null, "thread_count": null, "tokenizer_name": null, "tokenizer_type": null, "train_batch_size": 8, "train_custom_parameters_only": false, "trust_remote_code": false, "use_cached_eval_features": false, "use_early_stopping": false, "use_hf_datasets": false, "use_multiprocessing": true, "use_multiprocessing_for_evaluation": true, "wandb_kwargs": {}, "wandb_project": null, "warmup_ratio": 0.06, "warmup_steps": 716, "weight_decay": 0.0, "model_class": "NERModel", "classification_report": false, "labels_list": ["[0, '', 0, '']", "[1, 'evar', 0, '']", "[1, 'a', 0, '']", "[1, '', 0, '']", "[1, 'oj', 0, '']", "[2, '', 0, '']", "[6, 'ao', 0, '']", "[3, 'ivati', 0, '']", "[0, 'ti', 0, '']", "[0, 'be', 0, '']", "[2, 'an', 0, '']", "[4, 'ao', 0, '']", "[1, 'i', 0, '']", "[2, 'biti', 0, '']", "[2, 'iti', 0, '']", "[2, 'aj', 0, '']", "[1, 'e', 0, '']", "[3, 'an', 0, '']", "[2, 'ar', 0, '']", "[2, 'ji', 0, '']", "[1, 'eti', 0, '']", "[2, 'ti', 0, '']", "[3, 'lac', 0, '']", "[1, 'iti', 0, '']", "[1, 'o', 0, '']", "[2, 'av', 0, '']", "[3, 'iti', 0, '']", "[4, 'ti', 0, '']", "[4, 'iti', 0, '']", "[1, 'ji', 0, '']", "[3, '', 0, '']", "[2, 'ak', 0, '']", "[6, 'ak', 0, '']", "[5, '', 0, '']", "[3, 'o', 0, '']", "[3, 'aj', 0, '']", "[0, 'ti', 1, 'ht']", "[2, 'ti', 1, 'ht']", "[3, 'ak', 0, '']", "[2, 'i', 0, '']", "[3, 'ovati', 0, '']", "[1, 'sti', 0, '']", "[3, 'ka', 0, '']", "[2, 'a', 0, '']", "[4, 'li', 0, '']", "[2, '\u0107i', 0, '']", "[3, 'av', 0, '']", "[4, 'k', 0, '']", "[0, 'j', 0, '']", "[3, 'e', 0, '']", "[4, 'li', 3, '']", "[3, '', 0, 'o']", "[1, 'aj', 0, '']", "[2, 'eti', 0, '']", "[3, 'lik', 3, '']", "[2, 'zati', 0, '']", "[4, 'biti', 0, '']", "[1, '', 0, 'on']", "[1, 'ti', 0, '']", "[2, 'ti', 1, 'b']", "[5, 'iti', 0, '']", "[1, 'j', 0, '']", "[2, 'ka', 0, '']", "[5, 'ivati', 0, '']", "[4, 'ak', 0, '']", "[2, 'vati', 0, '']", "[3, 'st', 0, '']", "[3, 'ek', 0, '']", "[0, 'a', 0, '']", "[2, 'at', 0, '']", "[2, 'ac', 0, '']", "[1, 'ica', 0, '']", "[1, 'i\u010dan', 0, '']", "[1, 'arta', 0, '']", "[2, 'k', 0, '']", "[2, 'o', 0, '']", "[1, 'astarski', 0, '']", "[1, 'ojan', 0, '']", "[4, 'o', 0, '']", "[4, 'ac', 0, '']", "[3, 'o', 3, '']", "[3, 'vo', 0, '']", "[2, 'cati', 0, '']", "[1, 'ati', 0, '']", "[2, 'tak', 0, '']", "[2, 'on', 0, '']", "[2, 'ao', 0, '']", "[3, '\u0107i', 0, '']", "[3, 'ac', 0, '']", "[3, 'ar', 0, '']", "[4, '', 0, 'o']", "[3, 't', 0, '']", "[2, 'sok', 0, '']", "[2, '.o.o.', 0, '']", "[3, 'lo', 0, '']", "[3, 'ati', 0, '']", "[3, 'biti', 0, '']", "[2, 'g', 0, '']", "[5, 'an', 3, '']", "[1, 'istarski', 0, '']", "[3, 'tan', 0, '']", "[0, '', 3, '\u0161t']", "[4, '\u0161ta', 0, '']", "[2, 'sti', 0, '']", "[5, '\u0107i', 0, '']", "[1, 'an', 0, '']", "[2, 'lik', 0, '']", "[1, 'rokuplje', 0, '']", "[1, 'epublika', 0, '']", "[3, 'sti', 0, '']", "[2, 'eko', 0, '']", "[4, 'diti', 0, '']", "[2, 'ga', 0, '']", "[4, 'lac', 0, '']", "[0, 'i', 0, '']", "[4, 'av', 0, '']", "[2, 'zu', 0, '']", "[3, 'zak', 0, '']", "[4, 'titi', 0, '']", "[3, 'tak', 3, '']", "[4, 'vati', 0, '']", "[3, 'lo', 3, '']", "[3, 'ba', 0, '']", "[1, 'u\u017eben', 0, '']", "[1, 'ugi', 0, '']", "[4, 'at', 0, '']", "[3, 'titi', 0, '']", "[2, 'sati', 0, '']", "[4, 'deti', 0, '']", "[5, 'stiti', 0, '']", "[6, 'ogo', 0, 'm']", "[3, 'deti', 0, '']", "[4, 'an', 0, '']", "[2, 'tan', 0, '']", "[2, 'gati', 0, '']", "[1, 'i', 3, 'bi']", "[4, 'mnogo', 0, '']", "[4, 'g', 0, '']", "[4, '', 0, '']", "[2, 'ek', 0, '']", "[2, 'nuti', 0, '']", "[5, 'lac', 0, '']", "[1, 'uti', 0, '']", "[3, 'li', 0, '']", "[1, 'i\u010dno', 0, '']", "[6, 'r', 1, 'dob']", "[4, '\u0161to', 0, '']", "[2, 'zak', 0, '']", "[1, 'r\u0161ilac', 0, '']", "[1, 'u\u017enost', 0, '']", "[1, 'av', 0, '']", "[4, 'am', 0, '']", "[3, 'diti', 0, '']", "[4, 'tak', 0, '']", "[2, 'ha', 0, '']", "[5, 'slati', 0, '']", "[3, 'lik', 0, '']", "[1, 'ok', 0, '']", "[4, 'i', 0, 'o']", "[2, 'go', 0, '']", "[2, 'te', 0, '']", "[1, 'ak', 0, '']", "[4, 'ziti', 0, '']", "[2, 'tac', 0, '']", "[4, 'an', 3, '']", "[2, 'tko', 0, '']", "[2, 'zak', 3, '']", "[3, 'sok', 0, '']", "[3, 'ao', 0, '']", "[3, 'g', 0, '']", "[2, 'sko', 0, '']", "[2, 'tati', 0, '']", "[4, '\u0107i', 0, '']", "[1, 'a\u010dka', 0, '']", "[2, 'eko', 3, '']", "[3, 'i', 0, 'o']", "[6, 'o', 0, '']", "[2, 'go', 3, '']", "[3, 'avati', 0, '']", "[2, '', 0, 'o']", "[0, '', 3, 'sl']", "[3, 'siti', 0, '']", "[1, 'agistar', 0, '']", "[6, 'iti', 0, '']", "[4, 'citi', 0, '']", "[3, 'vojeru\u010dno', 0, '']", "[4, 'gati', 0, '']", "[4, 'stiti', 0, '']", "[3, 'ti', 0, '']", "[1, 'odina', 0, '']", "[1, '\u0161i', 0, '']", "[1, 'u\u0107an', 0, '']", "[1, 'resuda', 0, '']", "[1, 'p\u0161tinski', 0, '']", "[1, 'ud', 0, '']", "[2, 'an', 5, 'prav']", "[1, 'okat', 0, '']", "[1, 'en', 0, '']", "[1, 'oletan', 0, '']", "[3, 'ta', 0, '']", "[3, 'tac', 0, '']", "[2, 'am', 0, '']", "[1, 'izija', 0, '']", "[4, 'lja', 0, '']", "[1, 'omo\u0107nik', 0, '']", "[2, 'j', 0, '']", "[7, 'r', 1, 'dob']", "[1, 'ka', 0, '']", "[3, 'i', 0, '']", "[5, 'ki', 0, '']", "[1, 'n', 1, '']", "[1, 'ar\u0161al', 0, '']", "[5, 'k', 1, 'uz']", "[1, 'ovodan', 0, '']", "[4, 'sti', 0, '']", "[5, 'ovati', 0, '']", "[3, 'ajati', 0, '']", "[3, 'zac', 0, '']", "[0, 'k', 0, '']", "[2, '', 5, 'isposa']", "[3, 'a', 0, '']", "[4, 'tati', 0, '']", "[1, 'ana', 0, '']", "[2, 'hteti', 0, '']", "[3, 'teti', 0, '']", "[1, 'esor', 0, '']", "[3, 'li', 3, '']", "[0, 'ar', 0, '']", "[2, 'oktor', 0, '']", "[5, '\u010dovek', 0, '']", "[3, 'ti', 1, 'b']", "[3, 'anj', 0, '']", "[4, 'siti', 0, '']", "[2, 'lik', 3, '']", "[1, 'uriranje', 0, '']", "[5, 'sti', 0, '']", "[3, 'dak', 0, '']", "[6, 'stiti', 0, '']", "[0, 'ugi', 0, '']", "[4, 'ar', 0, 'do']", "[4, 'ivati', 0, '']", "[2, 'ja', 0, '']", "[1, 'amostalno', 0, '']", "[1, 'reduze\u0107e', 0, '']", "[2, 'zo', 0, '']", "[2, 'n', 0, '']", "[5, 'an', 0, '']", "[1, 'in', 0, '']", "[3, 'at', 0, '']", "[3, 'slan', 0, '']", "[6, 'ar', 0, '']", "[5, 'ent', 0, '']", "[3, 'ti', 1, 'sl']", "[3, 'tak', 0, '']", "[1, 'osno', 0, '']", "[4, 'cati', 0, '']", "[3, 'mo', 0, '']", "[1, 'efon', 0, '']", "[1, 'ugo', 0, '']", "[1, 'omiran', 0, '']", "[4, 'zak', 0, '']", "[1, 'ina', 0, '']", "[3, 'vati', 0, '']", "[1, 'oktor', 0, '']", "[0, 'an', 0, '']", "[0, 'ica', 0, '']", "[3, '\u0161ta', 0, '']", "[4, 'ti', 2, '']", "[2, 'ti', 4, 'inten']", "[0, '.', 0, '']", "[2, 'sok', 3, '']", "[0, 'botica', 0, '']", "[1, 'ektri\u010dan', 0, '']", "[1, 'i', 2, '']", "[0, 'ti', 1, 'b']", "[1, 'ena', 0, '']", "[2, '', 5, 'vodod']", "[5, 'zniti', 0, '']", "[0, 'n', 0, '']", "[2, 'g', 3, '']", "[4, 'len', 0, '']", "[1, 'omo\u0107je', 0, '']", "[2, 'ati', 0, '']", "[1, 'ospodin', 0, '']", "[2, '', 11, 'prvotu\u017een']", "[4, 'dak', 0, '']", "[3, 'len', 0, '']", "[2, 'ok', 0, '']"], "lazy_loading": false, "lazy_loading_start_line": 0, "onnx": false, "special_tokens_list": []}
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
vocab.txt ADDED
The diff for this file is too large to render. See raw diff