upload
Browse files- data/tamil_token_list/bpe_unigram500/bpe.model +3 -0
- data/tamil_token_list/bpe_unigram500/bpe.vocab +500 -0
- data/tamil_token_list/bpe_unigram500/tokens.txt +500 -0
- data/tamil_token_list/bpe_unigram500/train.txt +3 -0
- exp/asr_stats_raw_tamil_bpe500/train/feats_stats.npz +3 -0
- exp/asr_train_asr_raw_tamil_bpe500/config.yaml +711 -0
- exp/asr_train_asr_raw_tamil_bpe500/valid.acc.ave_10best.pth +3 -0
data/tamil_token_list/bpe_unigram500/bpe.model
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:d2f22a8b647d9a6b0c940c2c80a22b21f1c0426e6b58af1db863a64d17471663
|
3 |
+
size 248117
|
data/tamil_token_list/bpe_unigram500/bpe.vocab
ADDED
@@ -0,0 +1,500 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<unk> 0
|
2 |
+
<s> 0
|
3 |
+
</s> 0
|
4 |
+
ம் -3.87044
|
5 |
+
ர -4.12927
|
6 |
+
ய -4.13727
|
7 |
+
ன் -4.22709
|
8 |
+
வ -4.24746
|
9 |
+
▁அ -4.27808
|
10 |
+
ர் -4.29732
|
11 |
+
ு -4.30314
|
12 |
+
க -4.40828
|
13 |
+
▁ப -4.42684
|
14 |
+
ன -4.43551
|
15 |
+
க்க -4.44243
|
16 |
+
ல -4.52444
|
17 |
+
த -4.54018
|
18 |
+
ா -4.54061
|
19 |
+
▁க -4.57332
|
20 |
+
ப -4.59428
|
21 |
+
து -4.59662
|
22 |
+
ட -4.59851
|
23 |
+
▁இ -4.65383
|
24 |
+
ம -4.65687
|
25 |
+
ல் -4.6572
|
26 |
+
▁ம -4.71532
|
27 |
+
த்த -4.74682
|
28 |
+
▁த -4.77288
|
29 |
+
ும் -4.79391
|
30 |
+
ை -4.8396
|
31 |
+
ோ -4.85089
|
32 |
+
க் -4.85482
|
33 |
+
தி -4.85975
|
34 |
+
ி -4.87246
|
35 |
+
ே -4.90293
|
36 |
+
ிய -4.95176
|
37 |
+
கள் -5.01702
|
38 |
+
▁வி -5.05469
|
39 |
+
ந்த -5.05546
|
40 |
+
ரி -5.06237
|
41 |
+
டி -5.07385
|
42 |
+
▁ந -5.07764
|
43 |
+
ப்ப -5.13187
|
44 |
+
டு -5.13874
|
45 |
+
ட்ட -5.1546
|
46 |
+
▁வ -5.16901
|
47 |
+
▁உ -5.17494
|
48 |
+
லை -5.18783
|
49 |
+
▁ச -5.18866
|
50 |
+
ெ -5.19107
|
51 |
+
ச -5.2616
|
52 |
+
▁ஆ -5.26932
|
53 |
+
் -5.28406
|
54 |
+
வி -5.3086
|
55 |
+
ண -5.31872
|
56 |
+
ப் -5.32957
|
57 |
+
ள -5.35586
|
58 |
+
ீ -5.36276
|
59 |
+
மை -5.37329
|
60 |
+
த்து -5.38515
|
61 |
+
ரு -5.39345
|
62 |
+
று -5.42407
|
63 |
+
ில் -5.43067
|
64 |
+
க்கு -5.45401
|
65 |
+
ார் -5.46149
|
66 |
+
▁எ -5.46518
|
67 |
+
யில் -5.4871
|
68 |
+
ூ -5.517
|
69 |
+
ப்பு -5.52739
|
70 |
+
த்தில் -5.54703
|
71 |
+
ஸ் -5.56052
|
72 |
+
ட் -5.56358
|
73 |
+
வு -5.57145
|
74 |
+
▁பா -5.58584
|
75 |
+
த் -5.60489
|
76 |
+
ங்க -5.61493
|
77 |
+
னை -5.62129
|
78 |
+
ற்ற -5.64751
|
79 |
+
ட்டு -5.70773
|
80 |
+
ாக -5.71181
|
81 |
+
▁கு -5.71224
|
82 |
+
டை -5.75916
|
83 |
+
▁நி -5.76097
|
84 |
+
யா -5.78338
|
85 |
+
▁ஒரு -5.78887
|
86 |
+
லி -5.79061
|
87 |
+
ின் -5.80269
|
88 |
+
களை -5.80881
|
89 |
+
ச்ச -5.81518
|
90 |
+
ழ -5.82018
|
91 |
+
வா -5.84589
|
92 |
+
ள் -5.85155
|
93 |
+
கா -5.85356
|
94 |
+
ந்து -5.85863
|
95 |
+
▁சி -5.86533
|
96 |
+
ான -5.87477
|
97 |
+
▁நா -5.88737
|
98 |
+
▁கா -5.89804
|
99 |
+
▁மு -5.90193
|
100 |
+
ரை -5.92262
|
101 |
+
னி -5.92622
|
102 |
+
ற -5.92988
|
103 |
+
த்தி -5.93022
|
104 |
+
றி -5.93883
|
105 |
+
சி -5.94352
|
106 |
+
▁போ -5.95741
|
107 |
+
▁மற்றும் -5.9618
|
108 |
+
வை -5.97275
|
109 |
+
ரா -5.97964
|
110 |
+
ங்கள் -5.98277
|
111 |
+
ளி -6.00061
|
112 |
+
▁இந்த -6.05367
|
113 |
+
ந -6.05535
|
114 |
+
ண்ட -6.06441
|
115 |
+
கு -6.07105
|
116 |
+
ால் -6.07582
|
117 |
+
▁உள்ள -6.08829
|
118 |
+
யை -6.0896
|
119 |
+
▁பு -6.10127
|
120 |
+
▁வா -6.13494
|
121 |
+
கள -6.1569
|
122 |
+
▁வர -6.15889
|
123 |
+
பா -6.1648
|
124 |
+
த்தை -6.17401
|
125 |
+
▁மா -6.1905
|
126 |
+
ணி -6.19564
|
127 |
+
றை -6.19628
|
128 |
+
லா -6.19957
|
129 |
+
▁ -6.20532
|
130 |
+
வர் -6.20635
|
131 |
+
மா -6.21704
|
132 |
+
னர் -6.22212
|
133 |
+
மான -6.22479
|
134 |
+
மாக -6.22947
|
135 |
+
ொ -6.23706
|
136 |
+
கை -6.24853
|
137 |
+
ப்பட்ட -6.26247
|
138 |
+
▁கோ -6.26486
|
139 |
+
▁என -6.2664
|
140 |
+
ுள்ள -6.27867
|
141 |
+
ப்பி -6.32496
|
142 |
+
▁வே -6.32557
|
143 |
+
ல்ல -6.32842
|
144 |
+
ட்டி -6.32937
|
145 |
+
ழு -6.33209
|
146 |
+
▁சு -6.33458
|
147 |
+
ச் -6.33538
|
148 |
+
பி -6.36472
|
149 |
+
வர -6.37184
|
150 |
+
ண் -6.37754
|
151 |
+
▁கி -6.39428
|
152 |
+
மி -6.39711
|
153 |
+
யாக -6.40591
|
154 |
+
தை -6.41184
|
155 |
+
கி -6.41914
|
156 |
+
▁ர -6.41919
|
157 |
+
ழ் -6.42
|
158 |
+
▁ஏ -6.42523
|
159 |
+
ன்ற -6.42654
|
160 |
+
பு -6.42865
|
161 |
+
ரிய -6.43784
|
162 |
+
ற்க -6.44274
|
163 |
+
ுக்கு -6.44884
|
164 |
+
வே -6.45714
|
165 |
+
யின் -6.46464
|
166 |
+
களுக்கு -6.47762
|
167 |
+
▁தி -6.48391
|
168 |
+
தா -6.49654
|
169 |
+
▁கொ -6.4977
|
170 |
+
ாய் -6.50183
|
171 |
+
▁செய்த -6.50263
|
172 |
+
▁பிர -6.5106
|
173 |
+
▁நீ -6.51382
|
174 |
+
ுடன் -6.51795
|
175 |
+
▁என்று -6.5219
|
176 |
+
க்கும் -6.52537
|
177 |
+
ிற -6.52986
|
178 |
+
▁சா -6.54434
|
179 |
+
▁திரு -6.54475
|
180 |
+
ர்கள் -6.5505
|
181 |
+
ஜ -6.55081
|
182 |
+
▁ஒ -6.55761
|
183 |
+
யி -6.55922
|
184 |
+
ளை -6.55994
|
185 |
+
ற்ப -6.56595
|
186 |
+
▁தொ -6.57938
|
187 |
+
களில் -6.59097
|
188 |
+
ற்று -6.59598
|
189 |
+
களின் -6.59666
|
190 |
+
த்தின் -6.59878
|
191 |
+
▁செய்ய -6.60091
|
192 |
+
ப்பா -6.61027
|
193 |
+
ப்பட -6.61263
|
194 |
+
▁செ -6.61541
|
195 |
+
ையும் -6.62503
|
196 |
+
▁வெளி -6.63969
|
197 |
+
▁பி -6.64248
|
198 |
+
மு -6.64662
|
199 |
+
▁ஜ -6.66008
|
200 |
+
▁அவர் -6.66717
|
201 |
+
▁து -6.67047
|
202 |
+
சா -6.67416
|
203 |
+
நா -6.68272
|
204 |
+
▁வீ -6.68809
|
205 |
+
தாக -6.69035
|
206 |
+
▁தே -6.70072
|
207 |
+
ச்சி -6.70467
|
208 |
+
ண்டு -6.71209
|
209 |
+
ையில் -6.71442
|
210 |
+
க்கி -6.7161
|
211 |
+
ங்கள -6.71873
|
212 |
+
ண்ண -6.73262
|
213 |
+
▁ஊ -6.73511
|
214 |
+
▁தொடர் -6.74687
|
215 |
+
க்கப்பட்ட -6.75044
|
216 |
+
▁பொ -6.76462
|
217 |
+
▁முடி -6.79575
|
218 |
+
ங்கு -6.80731
|
219 |
+
▁தா -6.81175
|
220 |
+
ாளர் -6.81407
|
221 |
+
▁என்ற -6.81733
|
222 |
+
▁கட -6.83544
|
223 |
+
ங்கி -6.83586
|
224 |
+
▁தெ -6.8386
|
225 |
+
சு -6.84751
|
226 |
+
ூர் -6.84904
|
227 |
+
ழி -6.86154
|
228 |
+
▁அவ -6.86801
|
229 |
+
▁கொண்ட -6.88231
|
230 |
+
ுகின்ற -6.88858
|
231 |
+
யான -6.89115
|
232 |
+
▁தீ -6.8958
|
233 |
+
▁பல -6.91218
|
234 |
+
ழை -6.91711
|
235 |
+
▁பட -6.91793
|
236 |
+
▁வெ -6.93106
|
237 |
+
▁ட -6.9366
|
238 |
+
னா -6.93742
|
239 |
+
▁இந்திய -6.94475
|
240 |
+
ஷ -6.94529
|
241 |
+
▁கரு -6.96378
|
242 |
+
▁இருக்க -6.96633
|
243 |
+
ஞ்ச -6.96821
|
244 |
+
ுவத -6.99153
|
245 |
+
ணை -6.99332
|
246 |
+
▁இது -7.01376
|
247 |
+
ிருந்த -7.01534
|
248 |
+
வில் -7.01551
|
249 |
+
ங்களை -7.03261
|
250 |
+
▁வேண்டும் -7.04515
|
251 |
+
▁சே -7.04934
|
252 |
+
வும் -7.05493
|
253 |
+
▁சொ -7.06271
|
254 |
+
▁மூ -7.06274
|
255 |
+
ுள்ளது -7.067
|
256 |
+
▁நிலைய -7.06892
|
257 |
+
▁கூற -7.07001
|
258 |
+
▁இருந்த -7.07546
|
259 |
+
▁பகுதி -7.07692
|
260 |
+
ிலும் -7.08703
|
261 |
+
யே -7.09483
|
262 |
+
ங்களில் -7.09724
|
263 |
+
▁மாவட்ட -7.10378
|
264 |
+
▁கே -7.1047
|
265 |
+
▁ஓ -7.10895
|
266 |
+
▁அதிக -7.11564
|
267 |
+
▁மே -7.12719
|
268 |
+
▁வழங்க -7.13735
|
269 |
+
▁என்பது -7.13968
|
270 |
+
ருக்கு -7.14448
|
271 |
+
ராக -7.16524
|
272 |
+
ஷ் -7.1717
|
273 |
+
▁பெற்ற -7.17475
|
274 |
+
ப்படும் -7.18419
|
275 |
+
▁பணி -7.19036
|
276 |
+
▁கால -7.2038
|
277 |
+
▁முதல் -7.20633
|
278 |
+
▁அரசு -7.20899
|
279 |
+
ள்ள -7.21838
|
280 |
+
▁மீ -7.21849
|
281 |
+
த்திற்கு -7.21918
|
282 |
+
▁வந்த -7.22096
|
283 |
+
▁ஒன்ற -7.22952
|
284 |
+
ொரு -7.22975
|
285 |
+
▁பெண் -7.23446
|
286 |
+
லாம் -7.23941
|
287 |
+
▁ல -7.24261
|
288 |
+
ா் -7.24865
|
289 |
+
சை -7.25076
|
290 |
+
▁என்ன -7.25329
|
291 |
+
ாட்சி -7.25517
|
292 |
+
▁இட -7.26466
|
293 |
+
கிறார் -7.27583
|
294 |
+
▁ஐ -7.27681
|
295 |
+
ெய் -7.27688
|
296 |
+
ந்தி -7.27869
|
297 |
+
▁குறித்த -7.27869
|
298 |
+
▁இருந்து -7.28108
|
299 |
+
▁மிக -7.28345
|
300 |
+
தற்கு -7.28403
|
301 |
+
▁ஹ -7.28416
|
302 |
+
▁எதிர் -7.28474
|
303 |
+
றிய -7.28782
|
304 |
+
▁சில -7.28902
|
305 |
+
▁தமிழ் -7.2891
|
306 |
+
கிறது -7.29849
|
307 |
+
▁ஸ் -7.30002
|
308 |
+
ண்டி -7.30852
|
309 |
+
ட்டை -7.31957
|
310 |
+
▁வை -7.32733
|
311 |
+
ிடம் -7.33713
|
312 |
+
▁எழு -7.34024
|
313 |
+
ப்படுகிறது -7.34172
|
314 |
+
ரோ -7.34476
|
315 |
+
ுள்ளார் -7.34513
|
316 |
+
▁பெரு -7.34999
|
317 |
+
▁முன்ன -7.35199
|
318 |
+
ிருக்க -7.35286
|
319 |
+
ஞ -7.36125
|
320 |
+
▁நேர -7.36273
|
321 |
+
▁பொது -7.36711
|
322 |
+
தே -7.37161
|
323 |
+
▁அறிவ -7.38593
|
324 |
+
▁என்பத -7.39721
|
325 |
+
ாமல் -7.39891
|
326 |
+
▁இவர் -7.39988
|
327 |
+
▁பிற -7.40296
|
328 |
+
▁அந்த -7.40728
|
329 |
+
வ் -7.41051
|
330 |
+
▁வழக்க -7.41083
|
331 |
+
▁மேலும் -7.43614
|
332 |
+
களும் -7.44914
|
333 |
+
ந்திர -7.44944
|
334 |
+
தான் -7.4503
|
335 |
+
▁மக்கள் -7.45142
|
336 |
+
▁முறை -7.45363
|
337 |
+
▁கூட -7.46341
|
338 |
+
ரின் -7.46447
|
339 |
+
ங் -7.46688
|
340 |
+
▁கூட்ட -7.47067
|
341 |
+
ுகிறது -7.47087
|
342 |
+
▁ரா -7.47421
|
343 |
+
▁மேற் -7.47538
|
344 |
+
தில் -7.47697
|
345 |
+
▁வைத்த -7.4793
|
346 |
+
▁சீ -7.47993
|
347 |
+
ப்படுத்த -7.48113
|
348 |
+
போது -7.48617
|
349 |
+
▁பேர -7.48624
|
350 |
+
▁இணை -7.48782
|
351 |
+
▁அல்லது -7.49394
|
352 |
+
ற் -7.49844
|
353 |
+
முறை -7.50406
|
354 |
+
▁தெரிவித்த -7.50857
|
355 |
+
▁யா -7.5089
|
356 |
+
▁வழி -7.51094
|
357 |
+
கொண்ட -7.51245
|
358 |
+
▁சட்ட -7.51904
|
359 |
+
தால் -7.52212
|
360 |
+
ஸ -7.52244
|
361 |
+
▁தோ -7.525
|
362 |
+
▁கொண்டு -7.52518
|
363 |
+
ாகவும் -7.52781
|
364 |
+
▁சென்ற -7.52825
|
365 |
+
▁காரண -7.53334
|
366 |
+
▁மீது -7.53902
|
367 |
+
நிலை -7.54172
|
368 |
+
▁காண -7.54398
|
369 |
+
யில -7.55989
|
370 |
+
▁பிரி -7.56593
|
371 |
+
▁சூ -7.57195
|
372 |
+
ட்டில் -7.58116
|
373 |
+
▁ஆகிய -7.58436
|
374 |
+
▁பற்றி -7.59031
|
375 |
+
▁போன்ற -7.5934
|
376 |
+
▁கட்சி -7.59906
|
377 |
+
ினார் -7.60018
|
378 |
+
▁நடவடிக்கை -7.60463
|
379 |
+
▁இலங்கை -7.60643
|
380 |
+
▁மருத்துவ -7.60875
|
381 |
+
▁நடத்த -7.61029
|
382 |
+
ாவில் -7.61041
|
383 |
+
ாவின் -7.61265
|
384 |
+
ஜி -7.61488
|
385 |
+
▁இன்று -7.6162
|
386 |
+
ப்போ -7.61978
|
387 |
+
▁ஈ -7.62535
|
388 |
+
▁நிறுவன -7.62652
|
389 |
+
▁தமிழக -7.63611
|
390 |
+
▁தனது -7.63884
|
391 |
+
▁அரச -7.64101
|
392 |
+
▁பயன்படுத்த -7.6434
|
393 |
+
▁நகர -7.64586
|
394 |
+
▁மக்கள -7.64844
|
395 |
+
▁உலக -7.64982
|
396 |
+
ிலிருந்து -7.65085
|
397 |
+
▁டி -7.65198
|
398 |
+
ிற்கு -7.66416
|
399 |
+
▁மூலம் -7.6672
|
400 |
+
▁திட்ட -7.68222
|
401 |
+
▁இருப்ப -7.6833
|
402 |
+
ுதல் -7.69123
|
403 |
+
ஹ -7.69569
|
404 |
+
வில்லை -7.69897
|
405 |
+
▁முக்கிய -7.70226
|
406 |
+
▁மத்திய -7.72489
|
407 |
+
▁புதிய -7.73282
|
408 |
+
▁இயக்க -7.74027
|
409 |
+
ினர் -7.74365
|
410 |
+
▁குறை -7.74394
|
411 |
+
▁தாக்க -7.74436
|
412 |
+
▁கொள்ள -7.75062
|
413 |
+
ாலும் -7.75409
|
414 |
+
▁முதல -7.77869
|
415 |
+
ுடைய -7.7803
|
416 |
+
▁மாற்ற -7.7815
|
417 |
+
▁சென்னை -7.79497
|
418 |
+
▁பெயர -7.79743
|
419 |
+
▁குறிப்பிடத்தக்க -7.80186
|
420 |
+
▁ஆனால் -7.81094
|
421 |
+
▁தமிழ -7.81685
|
422 |
+
▁மாநில -7.81766
|
423 |
+
கொள்ள -7.82702
|
424 |
+
ங்களுக்கு -7.8307
|
425 |
+
▁பெற -7.83573
|
426 |
+
▁அமெரிக்க -7.83592
|
427 |
+
▁அமைந்துள்ள -7.8389
|
428 |
+
ின்றன -7.83899
|
429 |
+
ஃ -7.85081
|
430 |
+
▁செல்ல -7.85554
|
431 |
+
▁உயர் -7.86073
|
432 |
+
▁சம்ப -7.86599
|
433 |
+
▁செயல் -7.86752
|
434 |
+
▁அளவ -7.87314
|
435 |
+
ப்பட்டுள்ளது -7.87518
|
436 |
+
▁உயிர -7.87775
|
437 |
+
▁அமைப்ப -7.88032
|
438 |
+
▁ராஜ -7.89252
|
439 |
+
▁தேசிய -7.8962
|
440 |
+
▁போராட்ட -7.90548
|
441 |
+
▁தலைவர் -7.91205
|
442 |
+
▁குழந்தை -7.91331
|
443 |
+
▁தகவல் -7.91413
|
444 |
+
▁தேர்தல -7.91636
|
445 |
+
▁பள்ளி -7.93824
|
446 |
+
ாவது -7.95264
|
447 |
+
▁அருகே -7.95644
|
448 |
+
ிருப்ப -7.95815
|
449 |
+
▁அனைத்து -7.96976
|
450 |
+
▁இரண்டு -7.97462
|
451 |
+
▁விசாரணை -7.97574
|
452 |
+
▁அரசியல் -7.98094
|
453 |
+
▁சேர்ந்த -7.99905
|
454 |
+
���அதிகாரி -8.00873
|
455 |
+
▁உருவாக்க -8.02353
|
456 |
+
▁சிறப்ப -8.02428
|
457 |
+
▁மாணவர் -8.02627
|
458 |
+
▁மொழி -8.03518
|
459 |
+
▁இவ்வ -8.04066
|
460 |
+
ினால் -8.0436
|
461 |
+
▁தொழில -8.04512
|
462 |
+
▁கிராம -8.04591
|
463 |
+
▁வெற்றி -8.05301
|
464 |
+
▁செயல -8.07027
|
465 |
+
▁சமூக -8.07552
|
466 |
+
▁கல்வி -8.0763
|
467 |
+
▁பெரிய -8.07873
|
468 |
+
▁அடுத்த -8.08803
|
469 |
+
▁இல்லை -8.09175
|
470 |
+
▁தற்போது -8.10018
|
471 |
+
▁குடும்ப -8.1147
|
472 |
+
▁மனித -8.1159
|
473 |
+
▁அவரது -8.12498
|
474 |
+
▁ஆட்சி -8.14156
|
475 |
+
▁ஆகும் -8.14403
|
476 |
+
படுத்த -8.15026
|
477 |
+
▁உதவி -8.15509
|
478 |
+
▁வளர் -8.15571
|
479 |
+
▁ஏற்பட்ட -8.16072
|
480 |
+
▁நீதிமன்ற -8.16564
|
481 |
+
எ -8.16767
|
482 |
+
▁எடுக்க -8.17318
|
483 |
+
▁உடல -8.17894
|
484 |
+
▁அனுமதி -8.20644
|
485 |
+
ௌ -8.48685
|
486 |
+
அ -9.04635
|
487 |
+
ஐ -9.05132
|
488 |
+
ஏ -9.22496
|
489 |
+
ஆ -9.28605
|
490 |
+
இ -9.3993
|
491 |
+
உ -10.1828
|
492 |
+
ஒ -10.6098
|
493 |
+
ஓ -10.6962
|
494 |
+
ஈ -11.5349
|
495 |
+
ங -11.8865
|
496 |
+
ஊ -12.3722
|
497 |
+
ஔ -15.6257
|
498 |
+
௸ -15.6258
|
499 |
+
ஶ -15.6258
|
500 |
+
ௐ -15.6258
|
data/tamil_token_list/bpe_unigram500/tokens.txt
ADDED
@@ -0,0 +1,500 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
<blank>
|
2 |
+
<unk>
|
3 |
+
ம்
|
4 |
+
ர
|
5 |
+
ய
|
6 |
+
ன்
|
7 |
+
வ
|
8 |
+
▁அ
|
9 |
+
ர்
|
10 |
+
ு
|
11 |
+
க
|
12 |
+
▁ப
|
13 |
+
ன
|
14 |
+
க்க
|
15 |
+
ல
|
16 |
+
த
|
17 |
+
ா
|
18 |
+
▁க
|
19 |
+
ப
|
20 |
+
து
|
21 |
+
ட
|
22 |
+
▁இ
|
23 |
+
ம
|
24 |
+
ல்
|
25 |
+
▁ம
|
26 |
+
த்த
|
27 |
+
▁த
|
28 |
+
ும்
|
29 |
+
ை
|
30 |
+
ோ
|
31 |
+
க்
|
32 |
+
தி
|
33 |
+
ி
|
34 |
+
ே
|
35 |
+
ிய
|
36 |
+
கள்
|
37 |
+
▁வி
|
38 |
+
ந்த
|
39 |
+
ரி
|
40 |
+
டி
|
41 |
+
▁ந
|
42 |
+
ப்ப
|
43 |
+
டு
|
44 |
+
ட்ட
|
45 |
+
▁வ
|
46 |
+
▁உ
|
47 |
+
லை
|
48 |
+
▁ச
|
49 |
+
ெ
|
50 |
+
ச
|
51 |
+
▁ஆ
|
52 |
+
்
|
53 |
+
வி
|
54 |
+
ண
|
55 |
+
ப்
|
56 |
+
ள
|
57 |
+
ீ
|
58 |
+
மை
|
59 |
+
த்து
|
60 |
+
ரு
|
61 |
+
று
|
62 |
+
ில்
|
63 |
+
க்கு
|
64 |
+
ார்
|
65 |
+
▁எ
|
66 |
+
யில்
|
67 |
+
ூ
|
68 |
+
ப்பு
|
69 |
+
த்தில்
|
70 |
+
ஸ்
|
71 |
+
ட்
|
72 |
+
வு
|
73 |
+
▁பா
|
74 |
+
த்
|
75 |
+
ங்க
|
76 |
+
னை
|
77 |
+
ற்ற
|
78 |
+
ட்டு
|
79 |
+
ாக
|
80 |
+
▁கு
|
81 |
+
டை
|
82 |
+
▁நி
|
83 |
+
யா
|
84 |
+
▁ஒரு
|
85 |
+
லி
|
86 |
+
ின்
|
87 |
+
களை
|
88 |
+
ச்ச
|
89 |
+
ழ
|
90 |
+
வா
|
91 |
+
ள்
|
92 |
+
கா
|
93 |
+
ந்து
|
94 |
+
▁சி
|
95 |
+
ான
|
96 |
+
▁நா
|
97 |
+
▁கா
|
98 |
+
▁மு
|
99 |
+
ரை
|
100 |
+
னி
|
101 |
+
ற
|
102 |
+
த்தி
|
103 |
+
றி
|
104 |
+
சி
|
105 |
+
▁போ
|
106 |
+
▁மற்றும்
|
107 |
+
வை
|
108 |
+
ரா
|
109 |
+
ங்கள்
|
110 |
+
ளி
|
111 |
+
▁இந்த
|
112 |
+
ந
|
113 |
+
ண்ட
|
114 |
+
கு
|
115 |
+
ால்
|
116 |
+
▁உள்ள
|
117 |
+
யை
|
118 |
+
▁பு
|
119 |
+
▁வா
|
120 |
+
கள
|
121 |
+
▁வர
|
122 |
+
பா
|
123 |
+
த்தை
|
124 |
+
▁மா
|
125 |
+
ணி
|
126 |
+
றை
|
127 |
+
லா
|
128 |
+
▁
|
129 |
+
வர்
|
130 |
+
மா
|
131 |
+
னர்
|
132 |
+
மான
|
133 |
+
மாக
|
134 |
+
ொ
|
135 |
+
கை
|
136 |
+
ப்பட்ட
|
137 |
+
▁கோ
|
138 |
+
▁என
|
139 |
+
ுள்ள
|
140 |
+
ப்பி
|
141 |
+
▁வே
|
142 |
+
ல்ல
|
143 |
+
ட்டி
|
144 |
+
ழு
|
145 |
+
▁சு
|
146 |
+
ச்
|
147 |
+
பி
|
148 |
+
வர
|
149 |
+
ண்
|
150 |
+
▁கி
|
151 |
+
மி
|
152 |
+
யாக
|
153 |
+
தை
|
154 |
+
கி
|
155 |
+
▁ர
|
156 |
+
ழ்
|
157 |
+
▁ஏ
|
158 |
+
ன்ற
|
159 |
+
பு
|
160 |
+
ரிய
|
161 |
+
ற்க
|
162 |
+
ுக்கு
|
163 |
+
வே
|
164 |
+
யின்
|
165 |
+
களுக்கு
|
166 |
+
▁தி
|
167 |
+
தா
|
168 |
+
▁கொ
|
169 |
+
ாய்
|
170 |
+
▁செய்த
|
171 |
+
▁பிர
|
172 |
+
▁நீ
|
173 |
+
ுடன்
|
174 |
+
▁என்று
|
175 |
+
க்கும்
|
176 |
+
ிற
|
177 |
+
▁சா
|
178 |
+
▁திரு
|
179 |
+
ர்கள்
|
180 |
+
ஜ
|
181 |
+
▁ஒ
|
182 |
+
யி
|
183 |
+
ளை
|
184 |
+
ற்ப
|
185 |
+
▁தொ
|
186 |
+
களில்
|
187 |
+
ற்று
|
188 |
+
களின்
|
189 |
+
த்தின்
|
190 |
+
▁செய்ய
|
191 |
+
ப்பா
|
192 |
+
ப்பட
|
193 |
+
▁செ
|
194 |
+
ையும்
|
195 |
+
▁வெளி
|
196 |
+
▁பி
|
197 |
+
மு
|
198 |
+
▁ஜ
|
199 |
+
▁அவர்
|
200 |
+
▁து
|
201 |
+
சா
|
202 |
+
நா
|
203 |
+
▁வீ
|
204 |
+
தாக
|
205 |
+
▁தே
|
206 |
+
ச்சி
|
207 |
+
ண்டு
|
208 |
+
ையில்
|
209 |
+
க்கி
|
210 |
+
ங்கள
|
211 |
+
ண்ண
|
212 |
+
▁ஊ
|
213 |
+
▁தொடர்
|
214 |
+
க்கப்பட்ட
|
215 |
+
▁பொ
|
216 |
+
▁முடி
|
217 |
+
ங்கு
|
218 |
+
▁தா
|
219 |
+
ாளர்
|
220 |
+
▁என்ற
|
221 |
+
▁கட
|
222 |
+
ங்கி
|
223 |
+
▁தெ
|
224 |
+
சு
|
225 |
+
ூர்
|
226 |
+
ழி
|
227 |
+
▁அவ
|
228 |
+
▁கொண்ட
|
229 |
+
ுகின்ற
|
230 |
+
யான
|
231 |
+
▁தீ
|
232 |
+
▁பல
|
233 |
+
ழை
|
234 |
+
▁பட
|
235 |
+
▁வெ
|
236 |
+
▁ட
|
237 |
+
னா
|
238 |
+
▁இந்திய
|
239 |
+
ஷ
|
240 |
+
▁கரு
|
241 |
+
▁இருக்க
|
242 |
+
ஞ்ச
|
243 |
+
ுவத
|
244 |
+
ணை
|
245 |
+
▁இது
|
246 |
+
ிருந்த
|
247 |
+
வில்
|
248 |
+
ங்களை
|
249 |
+
▁வேண்டும்
|
250 |
+
▁சே
|
251 |
+
வும்
|
252 |
+
▁சொ
|
253 |
+
▁மூ
|
254 |
+
ுள்ளது
|
255 |
+
▁நிலைய
|
256 |
+
▁கூற
|
257 |
+
▁இருந்த
|
258 |
+
▁பகுதி
|
259 |
+
ிலும்
|
260 |
+
யே
|
261 |
+
ங்களில்
|
262 |
+
▁மாவட்ட
|
263 |
+
▁கே
|
264 |
+
▁ஓ
|
265 |
+
▁அதிக
|
266 |
+
▁மே
|
267 |
+
▁வழங்க
|
268 |
+
▁என்பது
|
269 |
+
ருக்கு
|
270 |
+
ராக
|
271 |
+
ஷ்
|
272 |
+
▁பெற்ற
|
273 |
+
ப்படும்
|
274 |
+
▁பணி
|
275 |
+
▁கால
|
276 |
+
▁முதல்
|
277 |
+
▁அரசு
|
278 |
+
ள்ள
|
279 |
+
▁மீ
|
280 |
+
த்திற்கு
|
281 |
+
▁வந்த
|
282 |
+
▁ஒன்ற
|
283 |
+
ொரு
|
284 |
+
▁பெண்
|
285 |
+
லாம்
|
286 |
+
▁ல
|
287 |
+
ா்
|
288 |
+
சை
|
289 |
+
▁என்ன
|
290 |
+
ாட்சி
|
291 |
+
▁இட
|
292 |
+
கிறார்
|
293 |
+
▁ஐ
|
294 |
+
ெய்
|
295 |
+
ந்தி
|
296 |
+
▁குறித்த
|
297 |
+
▁இருந்து
|
298 |
+
▁மிக
|
299 |
+
தற்கு
|
300 |
+
▁ஹ
|
301 |
+
▁எதிர்
|
302 |
+
றிய
|
303 |
+
▁சில
|
304 |
+
▁தமிழ்
|
305 |
+
கிறது
|
306 |
+
▁ஸ்
|
307 |
+
ண்டி
|
308 |
+
ட்டை
|
309 |
+
▁வை
|
310 |
+
ிடம்
|
311 |
+
▁எழு
|
312 |
+
ப்படுகிறது
|
313 |
+
ரோ
|
314 |
+
ுள்ளார்
|
315 |
+
▁பெரு
|
316 |
+
▁முன்ன
|
317 |
+
ிருக்க
|
318 |
+
ஞ
|
319 |
+
▁நேர
|
320 |
+
▁பொது
|
321 |
+
தே
|
322 |
+
▁அறிவ
|
323 |
+
▁என்பத
|
324 |
+
ாமல்
|
325 |
+
▁இவர்
|
326 |
+
▁பிற
|
327 |
+
▁அந்த
|
328 |
+
வ்
|
329 |
+
▁வழக்க
|
330 |
+
▁மேலும்
|
331 |
+
களும்
|
332 |
+
ந்திர
|
333 |
+
தான்
|
334 |
+
▁மக்கள்
|
335 |
+
▁முறை
|
336 |
+
▁கூட
|
337 |
+
ரின்
|
338 |
+
ங்
|
339 |
+
▁கூட்ட
|
340 |
+
ுகிறது
|
341 |
+
▁ரா
|
342 |
+
▁மேற்
|
343 |
+
தில்
|
344 |
+
▁வைத்த
|
345 |
+
▁சீ
|
346 |
+
ப்படுத்த
|
347 |
+
போது
|
348 |
+
▁பேர
|
349 |
+
▁இணை
|
350 |
+
▁அல்லது
|
351 |
+
ற்
|
352 |
+
முறை
|
353 |
+
▁தெரிவித்த
|
354 |
+
▁யா
|
355 |
+
▁வழி
|
356 |
+
கொண்ட
|
357 |
+
▁சட்ட
|
358 |
+
தால்
|
359 |
+
ஸ
|
360 |
+
▁தோ
|
361 |
+
▁கொண்டு
|
362 |
+
ாகவும்
|
363 |
+
▁சென்ற
|
364 |
+
▁காரண
|
365 |
+
▁மீது
|
366 |
+
நிலை
|
367 |
+
▁காண
|
368 |
+
யில
|
369 |
+
▁பிரி
|
370 |
+
▁சூ
|
371 |
+
ட்டில்
|
372 |
+
▁ஆகிய
|
373 |
+
▁பற்றி
|
374 |
+
▁போன்ற
|
375 |
+
▁கட்சி
|
376 |
+
ினார்
|
377 |
+
▁நடவடிக்கை
|
378 |
+
▁இலங்கை
|
379 |
+
▁மருத்துவ
|
380 |
+
▁நடத்த
|
381 |
+
ாவில்
|
382 |
+
ாவின்
|
383 |
+
ஜி
|
384 |
+
▁இன்று
|
385 |
+
ப்போ
|
386 |
+
▁ஈ
|
387 |
+
▁நிறுவன
|
388 |
+
▁தமிழக
|
389 |
+
▁தனது
|
390 |
+
▁அரச
|
391 |
+
▁பயன்படுத்த
|
392 |
+
▁நகர
|
393 |
+
▁மக்கள
|
394 |
+
▁உலக
|
395 |
+
ிலிருந்து
|
396 |
+
▁டி
|
397 |
+
ிற்கு
|
398 |
+
▁மூலம்
|
399 |
+
▁திட்ட
|
400 |
+
▁இருப்ப
|
401 |
+
ுதல்
|
402 |
+
ஹ
|
403 |
+
வில்லை
|
404 |
+
▁முக்கிய
|
405 |
+
▁மத்திய
|
406 |
+
▁புதிய
|
407 |
+
▁இயக்க
|
408 |
+
ினர்
|
409 |
+
▁குறை
|
410 |
+
▁தாக்க
|
411 |
+
▁கொள்ள
|
412 |
+
ாலும்
|
413 |
+
▁முதல
|
414 |
+
ுடைய
|
415 |
+
▁மாற்ற
|
416 |
+
▁சென்னை
|
417 |
+
▁பெயர
|
418 |
+
▁குறிப்பிடத்தக்க
|
419 |
+
▁ஆனால்
|
420 |
+
▁தமிழ
|
421 |
+
▁மாநில
|
422 |
+
கொள்ள
|
423 |
+
ங்களுக்கு
|
424 |
+
▁பெற
|
425 |
+
▁அமெரிக்க
|
426 |
+
▁அமைந்துள்ள
|
427 |
+
ின்றன
|
428 |
+
ஃ
|
429 |
+
▁செல்ல
|
430 |
+
▁உயர்
|
431 |
+
▁சம்ப
|
432 |
+
▁செயல்
|
433 |
+
▁அளவ
|
434 |
+
ப்பட்டுள்ளது
|
435 |
+
▁உயிர
|
436 |
+
▁அமைப்ப
|
437 |
+
▁ராஜ
|
438 |
+
▁தேசிய
|
439 |
+
▁போராட்ட
|
440 |
+
▁தலைவர்
|
441 |
+
▁குழந்தை
|
442 |
+
▁தகவல்
|
443 |
+
▁தேர்தல
|
444 |
+
▁பள்ளி
|
445 |
+
ாவது
|
446 |
+
▁அருகே
|
447 |
+
ிருப்ப
|
448 |
+
▁அனைத்து
|
449 |
+
▁இரண்டு
|
450 |
+
▁விசாரணை
|
451 |
+
▁அரசியல்
|
452 |
+
▁சேர்ந்த
|
453 |
+
▁அதிகாரி
|
454 |
+
▁உருவாக்க
|
455 |
+
▁சிறப்ப
|
456 |
+
▁மாணவர்
|
457 |
+
▁மொழி
|
458 |
+
▁இவ்வ
|
459 |
+
ினால்
|
460 |
+
▁தொழில
|
461 |
+
▁கிராம
|
462 |
+
▁வெற்றி
|
463 |
+
▁செயல
|
464 |
+
▁சமூக
|
465 |
+
▁கல்வி
|
466 |
+
▁பெரிய
|
467 |
+
▁அடுத்த
|
468 |
+
▁இல்லை
|
469 |
+
▁தற்போது
|
470 |
+
▁குடும்ப
|
471 |
+
▁மனித
|
472 |
+
▁அவரது
|
473 |
+
▁ஆட்சி
|
474 |
+
▁ஆகும்
|
475 |
+
படுத்த
|
476 |
+
▁உதவி
|
477 |
+
▁வளர்
|
478 |
+
▁ஏற்பட்ட
|
479 |
+
▁நீதிமன்ற
|
480 |
+
எ
|
481 |
+
▁எடுக்க
|
482 |
+
▁உடல
|
483 |
+
▁அனுமதி
|
484 |
+
ௌ
|
485 |
+
அ
|
486 |
+
ஐ
|
487 |
+
ஏ
|
488 |
+
ஆ
|
489 |
+
இ
|
490 |
+
உ
|
491 |
+
ஒ
|
492 |
+
ஓ
|
493 |
+
ஈ
|
494 |
+
ங
|
495 |
+
ஊ
|
496 |
+
ஔ
|
497 |
+
௸
|
498 |
+
ஶ
|
499 |
+
ௐ
|
500 |
+
<sos/eos>
|
data/tamil_token_list/bpe_unigram500/train.txt
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e2d564e07cce2895bae4d45b74d0d6ca0bf10ec3c066ba6291379afc3eb4fbc9
|
3 |
+
size 25783902
|
exp/asr_stats_raw_tamil_bpe500/train/feats_stats.npz
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:06e390ecdf86b1004d4cdd7d8fffe10dcc415d4a18672391f95394722fb422ff
|
3 |
+
size 1402
|
exp/asr_train_asr_raw_tamil_bpe500/config.yaml
ADDED
@@ -0,0 +1,711 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
config: conf/train_asr.yaml
|
2 |
+
print_config: false
|
3 |
+
log_level: INFO
|
4 |
+
dry_run: false
|
5 |
+
iterator_type: sequence
|
6 |
+
output_dir: exp/asr_train_asr_raw_tamil_bpe500
|
7 |
+
ngpu: 1
|
8 |
+
seed: 2022
|
9 |
+
num_workers: 4
|
10 |
+
num_att_plot: 3
|
11 |
+
dist_backend: nccl
|
12 |
+
dist_init_method: env://
|
13 |
+
dist_world_size: null
|
14 |
+
dist_rank: null
|
15 |
+
local_rank: 0
|
16 |
+
dist_master_addr: null
|
17 |
+
dist_master_port: null
|
18 |
+
dist_launcher: null
|
19 |
+
multiprocessing_distributed: false
|
20 |
+
unused_parameters: false
|
21 |
+
sharded_ddp: false
|
22 |
+
cudnn_enabled: true
|
23 |
+
cudnn_benchmark: false
|
24 |
+
cudnn_deterministic: true
|
25 |
+
collect_stats: false
|
26 |
+
write_collected_feats: false
|
27 |
+
max_epoch: 70
|
28 |
+
patience: null
|
29 |
+
val_scheduler_criterion:
|
30 |
+
- valid
|
31 |
+
- loss
|
32 |
+
early_stopping_criterion:
|
33 |
+
- valid
|
34 |
+
- loss
|
35 |
+
- min
|
36 |
+
best_model_criterion:
|
37 |
+
- - valid
|
38 |
+
- acc
|
39 |
+
- max
|
40 |
+
keep_nbest_models: 10
|
41 |
+
nbest_averaging_interval: 0
|
42 |
+
grad_clip: 5.0
|
43 |
+
grad_clip_type: 2.0
|
44 |
+
grad_noise: false
|
45 |
+
accum_grad: 4
|
46 |
+
no_forward_run: false
|
47 |
+
resume: true
|
48 |
+
train_dtype: float32
|
49 |
+
use_amp: true
|
50 |
+
log_interval: null
|
51 |
+
use_matplotlib: true
|
52 |
+
use_tensorboard: true
|
53 |
+
create_graph_in_tensorboard: false
|
54 |
+
use_wandb: false
|
55 |
+
wandb_project: null
|
56 |
+
wandb_id: null
|
57 |
+
wandb_entity: null
|
58 |
+
wandb_name: null
|
59 |
+
wandb_model_log_interval: -1
|
60 |
+
detect_anomaly: false
|
61 |
+
pretrain_path: null
|
62 |
+
init_param: []
|
63 |
+
ignore_init_mismatch: false
|
64 |
+
freeze_param: []
|
65 |
+
num_iters_per_epoch: null
|
66 |
+
batch_size: 20
|
67 |
+
valid_batch_size: null
|
68 |
+
batch_bins: 10000000
|
69 |
+
valid_batch_bins: null
|
70 |
+
train_shape_file:
|
71 |
+
- exp/asr_stats_raw_tamil_bpe500/train/speech_shape
|
72 |
+
- exp/asr_stats_raw_tamil_bpe500/train/text_shape.bpe
|
73 |
+
valid_shape_file:
|
74 |
+
- exp/asr_stats_raw_tamil_bpe500/valid/speech_shape
|
75 |
+
- exp/asr_stats_raw_tamil_bpe500/valid/text_shape.bpe
|
76 |
+
batch_type: numel
|
77 |
+
valid_batch_type: null
|
78 |
+
fold_length:
|
79 |
+
- 80000
|
80 |
+
- 150
|
81 |
+
sort_in_batch: descending
|
82 |
+
shuffle_within_batch: false
|
83 |
+
sort_batch: descending
|
84 |
+
multiple_iterator: false
|
85 |
+
chunk_length: 500
|
86 |
+
chunk_shift_ratio: 0.5
|
87 |
+
num_cache_chunks: 1024
|
88 |
+
chunk_excluded_key_prefixes: []
|
89 |
+
train_data_path_and_name_and_type:
|
90 |
+
- - dump/raw/tamil/train/wav.scp
|
91 |
+
- speech
|
92 |
+
- sound
|
93 |
+
- - dump/raw/tamil/train/text
|
94 |
+
- text
|
95 |
+
- text
|
96 |
+
valid_data_path_and_name_and_type:
|
97 |
+
- - dump/raw/tamil/valid/wav.scp
|
98 |
+
- speech
|
99 |
+
- sound
|
100 |
+
- - dump/raw/tamil/valid/text
|
101 |
+
- text
|
102 |
+
- text
|
103 |
+
allow_variable_data_keys: false
|
104 |
+
max_cache_size: 0.0
|
105 |
+
max_cache_fd: 32
|
106 |
+
valid_max_cache_size: null
|
107 |
+
exclude_weight_decay: false
|
108 |
+
exclude_weight_decay_conf: {}
|
109 |
+
optim: adam
|
110 |
+
optim_conf:
|
111 |
+
lr: 0.002
|
112 |
+
weight_decay: 1.0e-06
|
113 |
+
scheduler: warmuplr
|
114 |
+
scheduler_conf:
|
115 |
+
warmup_steps: 15000
|
116 |
+
token_list:
|
117 |
+
- <blank>
|
118 |
+
- <unk>
|
119 |
+
- ம்
|
120 |
+
- ர
|
121 |
+
- ய
|
122 |
+
- ன்
|
123 |
+
- வ
|
124 |
+
- ▁அ
|
125 |
+
- ர்
|
126 |
+
- ு
|
127 |
+
- க
|
128 |
+
- ▁ப
|
129 |
+
- ன
|
130 |
+
- க்க
|
131 |
+
- ல
|
132 |
+
- த
|
133 |
+
- ா
|
134 |
+
- ▁க
|
135 |
+
- ப
|
136 |
+
- து
|
137 |
+
- ட
|
138 |
+
- ▁இ
|
139 |
+
- ம
|
140 |
+
- ல்
|
141 |
+
- ▁ம
|
142 |
+
- த்த
|
143 |
+
- ▁த
|
144 |
+
- ும்
|
145 |
+
- ை
|
146 |
+
- ோ
|
147 |
+
- க்
|
148 |
+
- தி
|
149 |
+
- ி
|
150 |
+
- ே
|
151 |
+
- ிய
|
152 |
+
- கள்
|
153 |
+
- ▁வி
|
154 |
+
- ந்த
|
155 |
+
- ரி
|
156 |
+
- டி
|
157 |
+
- ▁ந
|
158 |
+
- ப்ப
|
159 |
+
- டு
|
160 |
+
- ட்ட
|
161 |
+
- ▁வ
|
162 |
+
- ▁உ
|
163 |
+
- லை
|
164 |
+
- ▁ச
|
165 |
+
- ெ
|
166 |
+
- ச
|
167 |
+
- ▁ஆ
|
168 |
+
- ்
|
169 |
+
- வி
|
170 |
+
- ண
|
171 |
+
- ப்
|
172 |
+
- ள
|
173 |
+
- ீ
|
174 |
+
- மை
|
175 |
+
- த்து
|
176 |
+
- ரு
|
177 |
+
- று
|
178 |
+
- ில்
|
179 |
+
- க்கு
|
180 |
+
- ார்
|
181 |
+
- ▁எ
|
182 |
+
- யில்
|
183 |
+
- ூ
|
184 |
+
- ப்பு
|
185 |
+
- த்தில்
|
186 |
+
- ஸ்
|
187 |
+
- ட்
|
188 |
+
- வு
|
189 |
+
- ▁பா
|
190 |
+
- த்
|
191 |
+
- ங்க
|
192 |
+
- னை
|
193 |
+
- ற்ற
|
194 |
+
- ட்டு
|
195 |
+
- ாக
|
196 |
+
- ▁கு
|
197 |
+
- டை
|
198 |
+
- ▁நி
|
199 |
+
- யா
|
200 |
+
- ▁ஒரு
|
201 |
+
- லி
|
202 |
+
- ின்
|
203 |
+
- களை
|
204 |
+
- ச்ச
|
205 |
+
- ழ
|
206 |
+
- வா
|
207 |
+
- ள்
|
208 |
+
- கா
|
209 |
+
- ந்து
|
210 |
+
- ▁சி
|
211 |
+
- ான
|
212 |
+
- ▁நா
|
213 |
+
- ▁கா
|
214 |
+
- ▁மு
|
215 |
+
- ரை
|
216 |
+
- னி
|
217 |
+
- ற
|
218 |
+
- த்தி
|
219 |
+
- றி
|
220 |
+
- சி
|
221 |
+
- ▁போ
|
222 |
+
- ▁மற்றும்
|
223 |
+
- வை
|
224 |
+
- ரா
|
225 |
+
- ங்கள்
|
226 |
+
- ளி
|
227 |
+
- ▁இந்த
|
228 |
+
- ந
|
229 |
+
- ண்ட
|
230 |
+
- கு
|
231 |
+
- ால்
|
232 |
+
- ▁உள்ள
|
233 |
+
- யை
|
234 |
+
- ▁பு
|
235 |
+
- ▁வா
|
236 |
+
- கள
|
237 |
+
- ▁வர
|
238 |
+
- பா
|
239 |
+
- த்தை
|
240 |
+
- ▁மா
|
241 |
+
- ணி
|
242 |
+
- றை
|
243 |
+
- லா
|
244 |
+
- ▁
|
245 |
+
- வர்
|
246 |
+
- மா
|
247 |
+
- னர்
|
248 |
+
- மான
|
249 |
+
- மாக
|
250 |
+
- ொ
|
251 |
+
- கை
|
252 |
+
- ப்பட்ட
|
253 |
+
- ▁கோ
|
254 |
+
- ▁என
|
255 |
+
- ுள்ள
|
256 |
+
- ப்பி
|
257 |
+
- ▁வே
|
258 |
+
- ல்ல
|
259 |
+
- ட்டி
|
260 |
+
- ழு
|
261 |
+
- ▁சு
|
262 |
+
- ச்
|
263 |
+
- பி
|
264 |
+
- வர
|
265 |
+
- ண்
|
266 |
+
- ▁கி
|
267 |
+
- மி
|
268 |
+
- யாக
|
269 |
+
- தை
|
270 |
+
- கி
|
271 |
+
- ▁ர
|
272 |
+
- ழ்
|
273 |
+
- ▁ஏ
|
274 |
+
- ன்ற
|
275 |
+
- பு
|
276 |
+
- ரிய
|
277 |
+
- ற்க
|
278 |
+
- ுக்கு
|
279 |
+
- வே
|
280 |
+
- யின்
|
281 |
+
- களுக்கு
|
282 |
+
- ▁தி
|
283 |
+
- தா
|
284 |
+
- ▁கொ
|
285 |
+
- ாய்
|
286 |
+
- ▁செய்த
|
287 |
+
- ▁பிர
|
288 |
+
- ▁நீ
|
289 |
+
- ுடன்
|
290 |
+
- ▁என்று
|
291 |
+
- க்கும்
|
292 |
+
- ிற
|
293 |
+
- ▁சா
|
294 |
+
- ▁திரு
|
295 |
+
- ர்கள்
|
296 |
+
- ஜ
|
297 |
+
- ▁ஒ
|
298 |
+
- யி
|
299 |
+
- ளை
|
300 |
+
- ற்ப
|
301 |
+
- ▁தொ
|
302 |
+
- களில்
|
303 |
+
- ற்று
|
304 |
+
- களின்
|
305 |
+
- த்தின்
|
306 |
+
- ▁செய்ய
|
307 |
+
- ப்பா
|
308 |
+
- ப்பட
|
309 |
+
- ▁செ
|
310 |
+
- ையும்
|
311 |
+
- ▁வெளி
|
312 |
+
- ▁பி
|
313 |
+
- மு
|
314 |
+
- ▁ஜ
|
315 |
+
- ▁அவர்
|
316 |
+
- ▁து
|
317 |
+
- சா
|
318 |
+
- நா
|
319 |
+
- ▁வீ
|
320 |
+
- தாக
|
321 |
+
- ▁தே
|
322 |
+
- ச்சி
|
323 |
+
- ண்டு
|
324 |
+
- ையில்
|
325 |
+
- க்கி
|
326 |
+
- ங்கள
|
327 |
+
- ��்ண
|
328 |
+
- ▁ஊ
|
329 |
+
- ▁தொடர்
|
330 |
+
- க்கப்பட்ட
|
331 |
+
- ▁பொ
|
332 |
+
- ▁முடி
|
333 |
+
- ங்கு
|
334 |
+
- ▁தா
|
335 |
+
- ாளர்
|
336 |
+
- ▁என்ற
|
337 |
+
- ▁கட
|
338 |
+
- ங்கி
|
339 |
+
- ▁தெ
|
340 |
+
- சு
|
341 |
+
- ூர்
|
342 |
+
- ழி
|
343 |
+
- ▁அவ
|
344 |
+
- ▁கொண்ட
|
345 |
+
- ுகின்ற
|
346 |
+
- யான
|
347 |
+
- ▁தீ
|
348 |
+
- ▁பல
|
349 |
+
- ழை
|
350 |
+
- ▁பட
|
351 |
+
- ▁வெ
|
352 |
+
- ▁ட
|
353 |
+
- னா
|
354 |
+
- ▁இந்திய
|
355 |
+
- ஷ
|
356 |
+
- ▁கரு
|
357 |
+
- ▁இருக்க
|
358 |
+
- ஞ்ச
|
359 |
+
- ுவத
|
360 |
+
- ணை
|
361 |
+
- ▁இது
|
362 |
+
- ிருந்த
|
363 |
+
- வில்
|
364 |
+
- ங்களை
|
365 |
+
- ▁வேண்டும்
|
366 |
+
- ▁சே
|
367 |
+
- வும்
|
368 |
+
- ▁சொ
|
369 |
+
- ▁மூ
|
370 |
+
- ுள்ளது
|
371 |
+
- ▁நிலைய
|
372 |
+
- ▁கூற
|
373 |
+
- ▁இருந்த
|
374 |
+
- ▁பகுதி
|
375 |
+
- ிலும்
|
376 |
+
- யே
|
377 |
+
- ங்களில்
|
378 |
+
- ▁மாவட்ட
|
379 |
+
- ▁கே
|
380 |
+
- ▁ஓ
|
381 |
+
- ▁அதிக
|
382 |
+
- ▁மே
|
383 |
+
- ▁வழங்க
|
384 |
+
- ▁என்பது
|
385 |
+
- ருக்கு
|
386 |
+
- ராக
|
387 |
+
- ஷ்
|
388 |
+
- ▁பெற்ற
|
389 |
+
- ப்படும்
|
390 |
+
- ▁பணி
|
391 |
+
- ▁கால
|
392 |
+
- ▁முதல்
|
393 |
+
- ▁அரசு
|
394 |
+
- ள்ள
|
395 |
+
- ▁மீ
|
396 |
+
- த்திற்கு
|
397 |
+
- ▁வந்த
|
398 |
+
- ▁ஒன்ற
|
399 |
+
- ொரு
|
400 |
+
- ▁பெண்
|
401 |
+
- லாம்
|
402 |
+
- ▁ல
|
403 |
+
- ா்
|
404 |
+
- சை
|
405 |
+
- ▁என்ன
|
406 |
+
- ாட்சி
|
407 |
+
- ▁இட
|
408 |
+
- கிறார்
|
409 |
+
- ▁ஐ
|
410 |
+
- ெய்
|
411 |
+
- ந்தி
|
412 |
+
- ▁குறித்த
|
413 |
+
- ▁இருந்து
|
414 |
+
- ▁மிக
|
415 |
+
- தற்கு
|
416 |
+
- ▁ஹ
|
417 |
+
- ▁எதிர்
|
418 |
+
- றிய
|
419 |
+
- ▁சில
|
420 |
+
- ▁தமிழ்
|
421 |
+
- கிறது
|
422 |
+
- ▁ஸ்
|
423 |
+
- ண்டி
|
424 |
+
- ட்டை
|
425 |
+
- ▁வை
|
426 |
+
- ிடம்
|
427 |
+
- ▁எழு
|
428 |
+
- ப்படுகிறது
|
429 |
+
- ரோ
|
430 |
+
- ுள்ளார்
|
431 |
+
- ▁பெரு
|
432 |
+
- ▁முன்ன
|
433 |
+
- ிருக்க
|
434 |
+
- ஞ
|
435 |
+
- ▁நேர
|
436 |
+
- ▁பொது
|
437 |
+
- தே
|
438 |
+
- ▁அறிவ
|
439 |
+
- ▁என்பத
|
440 |
+
- ாமல்
|
441 |
+
- ▁இவர்
|
442 |
+
- ▁பிற
|
443 |
+
- ▁அந்த
|
444 |
+
- வ்
|
445 |
+
- ▁வழக்க
|
446 |
+
- ▁மேலும்
|
447 |
+
- களும்
|
448 |
+
- ந்திர
|
449 |
+
- தான்
|
450 |
+
- ▁மக்கள்
|
451 |
+
- ▁முறை
|
452 |
+
- ▁கூட
|
453 |
+
- ரின்
|
454 |
+
- ங்
|
455 |
+
- ▁கூட்ட
|
456 |
+
- ுகிறது
|
457 |
+
- ▁ரா
|
458 |
+
- ▁மேற்
|
459 |
+
- தில்
|
460 |
+
- ▁வைத்த
|
461 |
+
- ▁சீ
|
462 |
+
- ப்படுத்த
|
463 |
+
- போது
|
464 |
+
- ▁பேர
|
465 |
+
- ▁இணை
|
466 |
+
- ▁அல்லது
|
467 |
+
- ற்
|
468 |
+
- முறை
|
469 |
+
- ▁தெரிவித்த
|
470 |
+
- ▁யா
|
471 |
+
- ▁வழி
|
472 |
+
- கொண்ட
|
473 |
+
- ▁சட்ட
|
474 |
+
- தால்
|
475 |
+
- ஸ
|
476 |
+
- ▁தோ
|
477 |
+
- ▁கொண்டு
|
478 |
+
- ாகவும்
|
479 |
+
- ▁சென்ற
|
480 |
+
- ▁காரண
|
481 |
+
- ▁மீது
|
482 |
+
- நிலை
|
483 |
+
- ▁காண
|
484 |
+
- யில
|
485 |
+
- ▁பிரி
|
486 |
+
- ▁சூ
|
487 |
+
- ட்டில்
|
488 |
+
- ▁ஆகிய
|
489 |
+
- ▁பற்றி
|
490 |
+
- ▁போன்ற
|
491 |
+
- ▁கட்சி
|
492 |
+
- ினார்
|
493 |
+
- ▁நடவடிக்கை
|
494 |
+
- ▁இலங்கை
|
495 |
+
- ▁மருத்துவ
|
496 |
+
- ▁நடத்த
|
497 |
+
- ாவில்
|
498 |
+
- ாவின்
|
499 |
+
- ஜி
|
500 |
+
- ▁இன்று
|
501 |
+
- ப்போ
|
502 |
+
- ▁ஈ
|
503 |
+
- ▁நிறுவன
|
504 |
+
- ▁தமிழக
|
505 |
+
- ▁தனது
|
506 |
+
- ▁அரச
|
507 |
+
- ▁பயன்படுத்த
|
508 |
+
- ▁நகர
|
509 |
+
- ▁மக்கள
|
510 |
+
- ▁உலக
|
511 |
+
- ிலிருந்து
|
512 |
+
- ▁டி
|
513 |
+
- ிற்கு
|
514 |
+
- ▁மூலம்
|
515 |
+
- ▁திட்ட
|
516 |
+
- ▁இருப்ப
|
517 |
+
- ுதல்
|
518 |
+
- ஹ
|
519 |
+
- வில்லை
|
520 |
+
- ▁முக்கிய
|
521 |
+
- ▁மத்திய
|
522 |
+
- ▁புதிய
|
523 |
+
- ▁இயக்க
|
524 |
+
- ினர்
|
525 |
+
- ▁குறை
|
526 |
+
- ▁தாக்க
|
527 |
+
- ▁கொள்ள
|
528 |
+
- ாலும்
|
529 |
+
- ▁முதல
|
530 |
+
- ுடைய
|
531 |
+
- ▁மாற்ற
|
532 |
+
- ▁சென்னை
|
533 |
+
- ▁பெயர
|
534 |
+
- ▁குறிப்பிடத்தக்க
|
535 |
+
- ▁ஆனால்
|
536 |
+
- ▁தமிழ
|
537 |
+
- ▁மாநில
|
538 |
+
- கொள்ள
|
539 |
+
- ங்களுக்கு
|
540 |
+
- ▁பெற
|
541 |
+
- ▁அமெரிக்க
|
542 |
+
- ▁அமைந்துள்ள
|
543 |
+
- ின்றன
|
544 |
+
- ஃ
|
545 |
+
- ▁செல்ல
|
546 |
+
- ▁உயர்
|
547 |
+
- ▁சம்ப
|
548 |
+
- ▁செயல்
|
549 |
+
- ▁அளவ
|
550 |
+
- ப்பட்டுள்ளது
|
551 |
+
- ▁உயிர
|
552 |
+
- ▁அமைப்ப
|
553 |
+
- ▁ராஜ
|
554 |
+
- ▁தேசிய
|
555 |
+
- ▁போராட்ட
|
556 |
+
- ▁தலைவர்
|
557 |
+
- ▁குழந்தை
|
558 |
+
- ▁தகவல்
|
559 |
+
- ▁தேர்தல
|
560 |
+
- ▁பள்ளி
|
561 |
+
- ாவது
|
562 |
+
- ▁அருகே
|
563 |
+
- ிருப்ப
|
564 |
+
- ▁அனைத்து
|
565 |
+
- ▁இரண்டு
|
566 |
+
- ▁விசாரணை
|
567 |
+
- ▁அரசியல்
|
568 |
+
- ▁சேர்ந்த
|
569 |
+
- ▁அதிகாரி
|
570 |
+
- ▁உருவாக்க
|
571 |
+
- ▁சிறப்ப
|
572 |
+
- ▁மாணவர்
|
573 |
+
- ▁மொழி
|
574 |
+
- ▁இவ்வ
|
575 |
+
- ினால்
|
576 |
+
- ▁தொழில
|
577 |
+
- ▁கிராம
|
578 |
+
- ▁வெற்றி
|
579 |
+
- ▁செயல
|
580 |
+
- ▁சமூக
|
581 |
+
- ▁கல்வி
|
582 |
+
- ▁பெரிய
|
583 |
+
- ▁அடுத்த
|
584 |
+
- ▁இல்லை
|
585 |
+
- ▁தற்போது
|
586 |
+
- ▁குடும்ப
|
587 |
+
- ▁மனித
|
588 |
+
- ▁அவரது
|
589 |
+
- ▁ஆட்சி
|
590 |
+
- ▁ஆகும்
|
591 |
+
- படுத்த
|
592 |
+
- ▁உதவி
|
593 |
+
- ��வளர்
|
594 |
+
- ▁ஏற்பட்ட
|
595 |
+
- ▁நீதிமன்ற
|
596 |
+
- எ
|
597 |
+
- ▁எடுக்க
|
598 |
+
- ▁உடல
|
599 |
+
- ▁அனுமதி
|
600 |
+
- ௌ
|
601 |
+
- அ
|
602 |
+
- ஐ
|
603 |
+
- ஏ
|
604 |
+
- ஆ
|
605 |
+
- இ
|
606 |
+
- உ
|
607 |
+
- ஒ
|
608 |
+
- ஓ
|
609 |
+
- ஈ
|
610 |
+
- ங
|
611 |
+
- ஊ
|
612 |
+
- ஔ
|
613 |
+
- ௸
|
614 |
+
- ஶ
|
615 |
+
- ௐ
|
616 |
+
- <sos/eos>
|
617 |
+
init: null
|
618 |
+
input_size: null
|
619 |
+
ctc_conf:
|
620 |
+
dropout_rate: 0.0
|
621 |
+
ctc_type: builtin
|
622 |
+
reduce: true
|
623 |
+
ignore_nan_grad: null
|
624 |
+
zero_infinity: true
|
625 |
+
joint_net_conf: null
|
626 |
+
use_preprocessor: true
|
627 |
+
token_type: bpe
|
628 |
+
bpemodel: data/tamil_token_list/bpe_unigram500/bpe.model
|
629 |
+
non_linguistic_symbols: null
|
630 |
+
cleaner: null
|
631 |
+
g2p: null
|
632 |
+
speech_volume_normalize: null
|
633 |
+
rir_scp: null
|
634 |
+
rir_apply_prob: 1.0
|
635 |
+
noise_scp: null
|
636 |
+
noise_apply_prob: 1.0
|
637 |
+
noise_db_range: '13_15'
|
638 |
+
short_noise_thres: 0.5
|
639 |
+
aux_ctc_tasks: []
|
640 |
+
frontend: default
|
641 |
+
frontend_conf:
|
642 |
+
n_fft: 512
|
643 |
+
win_length: 400
|
644 |
+
hop_length: 160
|
645 |
+
fs: 16k
|
646 |
+
specaug: specaug
|
647 |
+
specaug_conf:
|
648 |
+
apply_time_warp: true
|
649 |
+
time_warp_window: 5
|
650 |
+
time_warp_mode: bicubic
|
651 |
+
apply_freq_mask: true
|
652 |
+
freq_mask_width_range:
|
653 |
+
- 0
|
654 |
+
- 27
|
655 |
+
num_freq_mask: 2
|
656 |
+
apply_time_mask: true
|
657 |
+
time_mask_width_ratio_range:
|
658 |
+
- 0.0
|
659 |
+
- 0.05
|
660 |
+
num_time_mask: 5
|
661 |
+
normalize: global_mvn
|
662 |
+
normalize_conf:
|
663 |
+
stats_file: exp/asr_stats_raw_tamil_bpe500/train/feats_stats.npz
|
664 |
+
model: espnet
|
665 |
+
model_conf:
|
666 |
+
ctc_weight: 0.3
|
667 |
+
lsm_weight: 0.1
|
668 |
+
length_normalized_loss: false
|
669 |
+
preencoder: null
|
670 |
+
preencoder_conf: {}
|
671 |
+
encoder: e_branchformer
|
672 |
+
encoder_conf:
|
673 |
+
output_size: 256
|
674 |
+
attention_heads: 4
|
675 |
+
attention_layer_type: rel_selfattn
|
676 |
+
pos_enc_layer_type: rel_pos
|
677 |
+
rel_pos_type: latest
|
678 |
+
cgmlp_linear_units: 1024
|
679 |
+
cgmlp_conv_kernel: 31
|
680 |
+
use_linear_after_conv: false
|
681 |
+
gate_activation: identity
|
682 |
+
num_blocks: 12
|
683 |
+
dropout_rate: 0.1
|
684 |
+
positional_dropout_rate: 0.1
|
685 |
+
attention_dropout_rate: 0.1
|
686 |
+
input_layer: conv2d
|
687 |
+
layer_drop_rate: 0.0
|
688 |
+
linear_units: 1024
|
689 |
+
positionwise_layer_type: linear
|
690 |
+
use_ffn: true
|
691 |
+
macaron_ffn: true
|
692 |
+
merge_conv_kernel: 31
|
693 |
+
postencoder: null
|
694 |
+
postencoder_conf: {}
|
695 |
+
decoder: transformer
|
696 |
+
decoder_conf:
|
697 |
+
attention_heads: 4
|
698 |
+
linear_units: 2048
|
699 |
+
num_blocks: 6
|
700 |
+
dropout_rate: 0.1
|
701 |
+
positional_dropout_rate: 0.1
|
702 |
+
self_attention_dropout_rate: 0.1
|
703 |
+
src_attention_dropout_rate: 0.1
|
704 |
+
layer_drop_rate: 0.0
|
705 |
+
preprocessor: default
|
706 |
+
preprocessor_conf: {}
|
707 |
+
required:
|
708 |
+
- output_dir
|
709 |
+
- token_list
|
710 |
+
version: '202304'
|
711 |
+
distributed: false
|
exp/asr_train_asr_raw_tamil_bpe500/valid.acc.ave_10best.pth
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:cdd89ae5d403ca42db9ab562057b3be92b55dc13481545809fc203c114471437
|
3 |
+
size 140354317
|