Ssid7647 commited on
Commit
723dcfa
1 Parent(s): beb870e

Upload 7 files

Browse files
config.json ADDED
@@ -0,0 +1,35 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "./mt5_transliteration_upgraded/checkpoint-96802",
3
+ "architectures": [
4
+ "MT5ForConditionalGeneration"
5
+ ],
6
+ "bos_token_id": 811,
7
+ "classifier_dropout": 0.0,
8
+ "d_ff": 1024,
9
+ "d_kv": 64,
10
+ "d_model": 512,
11
+ "decoder_start_token_id": 813,
12
+ "dense_act_fn": "gelu_new",
13
+ "dropout_rate": 0.1,
14
+ "early_stopping": true,
15
+ "eos_token_id": 812,
16
+ "feed_forward_proj": "gated-gelu",
17
+ "initializer_factor": 1.0,
18
+ "is_encoder_decoder": true,
19
+ "is_gated_act": true,
20
+ "layer_norm_epsilon": 1e-06,
21
+ "max_length": 64,
22
+ "model_type": "mt5",
23
+ "num_decoder_layers": 4,
24
+ "num_heads": 4,
25
+ "num_layers": 4,
26
+ "pad_token_id": 813,
27
+ "relative_attention_max_distance": 128,
28
+ "relative_attention_num_buckets": 32,
29
+ "tie_word_embeddings": false,
30
+ "tokenizer_class": "T5Tokenizer",
31
+ "torch_dtype": "float32",
32
+ "transformers_version": "4.30.2",
33
+ "use_cache": true,
34
+ "vocab_size": 826
35
+ }
generation_config.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token_id": 811,
3
+ "decoder_start_token_id": 813,
4
+ "early_stopping": true,
5
+ "eos_token_id": 812,
6
+ "max_length": 64,
7
+ "pad_token_id": 813,
8
+ "transformers_version": "4.30.2"
9
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eea47c12a0b8ec05665d7b4d1882605d7cc842199e8dbdf8a94e610957015c41
3
+ size 78959231
special_tokens_map.json ADDED
@@ -0,0 +1,138 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<arb>",
4
+ "<asm>",
5
+ "<ben>",
6
+ "<guj>",
7
+ "<hin>",
8
+ "<kan>",
9
+ "<mal>",
10
+ "<mar>",
11
+ "<ori>",
12
+ "<pan>",
13
+ "<tam>",
14
+ "<tel>",
15
+ "<extra_id_0>",
16
+ "<extra_id_1>",
17
+ "<extra_id_2>",
18
+ "<extra_id_3>",
19
+ "<extra_id_4>",
20
+ "<extra_id_5>",
21
+ "<extra_id_6>",
22
+ "<extra_id_7>",
23
+ "<extra_id_8>",
24
+ "<extra_id_9>",
25
+ "<extra_id_10>",
26
+ "<extra_id_11>",
27
+ "<extra_id_12>",
28
+ "<extra_id_13>",
29
+ "<extra_id_14>",
30
+ "<extra_id_15>",
31
+ "<extra_id_16>",
32
+ "<extra_id_17>",
33
+ "<extra_id_18>",
34
+ "<extra_id_19>",
35
+ "<extra_id_20>",
36
+ "<extra_id_21>",
37
+ "<extra_id_22>",
38
+ "<extra_id_23>",
39
+ "<extra_id_24>",
40
+ "<extra_id_25>",
41
+ "<extra_id_26>",
42
+ "<extra_id_27>",
43
+ "<extra_id_28>",
44
+ "<extra_id_29>",
45
+ "<extra_id_30>",
46
+ "<extra_id_31>",
47
+ "<extra_id_32>",
48
+ "<extra_id_33>",
49
+ "<extra_id_34>",
50
+ "<extra_id_35>",
51
+ "<extra_id_36>",
52
+ "<extra_id_37>",
53
+ "<extra_id_38>",
54
+ "<extra_id_39>",
55
+ "<extra_id_40>",
56
+ "<extra_id_41>",
57
+ "<extra_id_42>",
58
+ "<extra_id_43>",
59
+ "<extra_id_44>",
60
+ "<extra_id_45>",
61
+ "<extra_id_46>",
62
+ "<extra_id_47>",
63
+ "<extra_id_48>",
64
+ "<extra_id_49>",
65
+ "<extra_id_50>",
66
+ "<extra_id_51>",
67
+ "<extra_id_52>",
68
+ "<extra_id_53>",
69
+ "<extra_id_54>",
70
+ "<extra_id_55>",
71
+ "<extra_id_56>",
72
+ "<extra_id_57>",
73
+ "<extra_id_58>",
74
+ "<extra_id_59>",
75
+ "<extra_id_60>",
76
+ "<extra_id_61>",
77
+ "<extra_id_62>",
78
+ "<extra_id_63>",
79
+ "<extra_id_64>",
80
+ "<extra_id_65>",
81
+ "<extra_id_66>",
82
+ "<extra_id_67>",
83
+ "<extra_id_68>",
84
+ "<extra_id_69>",
85
+ "<extra_id_70>",
86
+ "<extra_id_71>",
87
+ "<extra_id_72>",
88
+ "<extra_id_73>",
89
+ "<extra_id_74>",
90
+ "<extra_id_75>",
91
+ "<extra_id_76>",
92
+ "<extra_id_77>",
93
+ "<extra_id_78>",
94
+ "<extra_id_79>",
95
+ "<extra_id_80>",
96
+ "<extra_id_81>",
97
+ "<extra_id_82>",
98
+ "<extra_id_83>",
99
+ "<extra_id_84>",
100
+ "<extra_id_85>",
101
+ "<extra_id_86>",
102
+ "<extra_id_87>",
103
+ "<extra_id_88>",
104
+ "<extra_id_89>",
105
+ "<extra_id_90>",
106
+ "<extra_id_91>",
107
+ "<extra_id_92>",
108
+ "<extra_id_93>",
109
+ "<extra_id_94>",
110
+ "<extra_id_95>",
111
+ "<extra_id_96>",
112
+ "<extra_id_97>",
113
+ "<extra_id_98>",
114
+ "<extra_id_99>"
115
+ ],
116
+ "bos_token": {
117
+ "content": "<s>",
118
+ "lstrip": false,
119
+ "normalized": false,
120
+ "rstrip": false,
121
+ "single_word": false
122
+ },
123
+ "eos_token": {
124
+ "content": "</s>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false
129
+ },
130
+ "pad_token": {
131
+ "content": "<pad>",
132
+ "lstrip": false,
133
+ "normalized": false,
134
+ "rstrip": false,
135
+ "single_word": false
136
+ },
137
+ "unk_token": "<unk>"
138
+ }
tokenizer.json ADDED
@@ -0,0 +1,1898 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "1.0",
3
+ "truncation": null,
4
+ "padding": {
5
+ "strategy": "BatchLongest",
6
+ "direction": "Right",
7
+ "pad_to_multiple_of": null,
8
+ "pad_id": 813,
9
+ "pad_type_id": 0,
10
+ "pad_token": "<pad>"
11
+ },
12
+ "added_tokens": [
13
+ {
14
+ "id": 0,
15
+ "content": "<unk>",
16
+ "single_word": false,
17
+ "lstrip": false,
18
+ "rstrip": false,
19
+ "normalized": false,
20
+ "special": true
21
+ },
22
+ {
23
+ "id": 811,
24
+ "content": "<s>",
25
+ "single_word": false,
26
+ "lstrip": false,
27
+ "rstrip": false,
28
+ "normalized": false,
29
+ "special": true
30
+ },
31
+ {
32
+ "id": 812,
33
+ "content": "</s>",
34
+ "single_word": false,
35
+ "lstrip": false,
36
+ "rstrip": false,
37
+ "normalized": false,
38
+ "special": true
39
+ },
40
+ {
41
+ "id": 813,
42
+ "content": "<pad>",
43
+ "single_word": false,
44
+ "lstrip": false,
45
+ "rstrip": false,
46
+ "normalized": false,
47
+ "special": true
48
+ },
49
+ {
50
+ "id": 814,
51
+ "content": "<arb>",
52
+ "single_word": false,
53
+ "lstrip": false,
54
+ "rstrip": false,
55
+ "normalized": false,
56
+ "special": true
57
+ },
58
+ {
59
+ "id": 815,
60
+ "content": "<asm>",
61
+ "single_word": false,
62
+ "lstrip": false,
63
+ "rstrip": false,
64
+ "normalized": false,
65
+ "special": true
66
+ },
67
+ {
68
+ "id": 816,
69
+ "content": "<ben>",
70
+ "single_word": false,
71
+ "lstrip": false,
72
+ "rstrip": false,
73
+ "normalized": false,
74
+ "special": true
75
+ },
76
+ {
77
+ "id": 817,
78
+ "content": "<guj>",
79
+ "single_word": false,
80
+ "lstrip": false,
81
+ "rstrip": false,
82
+ "normalized": false,
83
+ "special": true
84
+ },
85
+ {
86
+ "id": 818,
87
+ "content": "<hin>",
88
+ "single_word": false,
89
+ "lstrip": false,
90
+ "rstrip": false,
91
+ "normalized": false,
92
+ "special": true
93
+ },
94
+ {
95
+ "id": 819,
96
+ "content": "<kan>",
97
+ "single_word": false,
98
+ "lstrip": false,
99
+ "rstrip": false,
100
+ "normalized": false,
101
+ "special": true
102
+ },
103
+ {
104
+ "id": 820,
105
+ "content": "<mal>",
106
+ "single_word": false,
107
+ "lstrip": false,
108
+ "rstrip": false,
109
+ "normalized": false,
110
+ "special": true
111
+ },
112
+ {
113
+ "id": 821,
114
+ "content": "<mar>",
115
+ "single_word": false,
116
+ "lstrip": false,
117
+ "rstrip": false,
118
+ "normalized": false,
119
+ "special": true
120
+ },
121
+ {
122
+ "id": 822,
123
+ "content": "<ori>",
124
+ "single_word": false,
125
+ "lstrip": false,
126
+ "rstrip": false,
127
+ "normalized": false,
128
+ "special": true
129
+ },
130
+ {
131
+ "id": 823,
132
+ "content": "<pan>",
133
+ "single_word": false,
134
+ "lstrip": false,
135
+ "rstrip": false,
136
+ "normalized": false,
137
+ "special": true
138
+ },
139
+ {
140
+ "id": 824,
141
+ "content": "<tam>",
142
+ "single_word": false,
143
+ "lstrip": false,
144
+ "rstrip": false,
145
+ "normalized": false,
146
+ "special": true
147
+ },
148
+ {
149
+ "id": 825,
150
+ "content": "<tel>",
151
+ "single_word": false,
152
+ "lstrip": false,
153
+ "rstrip": false,
154
+ "normalized": false,
155
+ "special": true
156
+ },
157
+ {
158
+ "id": 826,
159
+ "content": "<extra_id_0>",
160
+ "single_word": false,
161
+ "lstrip": false,
162
+ "rstrip": false,
163
+ "normalized": false,
164
+ "special": true
165
+ },
166
+ {
167
+ "id": 827,
168
+ "content": "<extra_id_1>",
169
+ "single_word": false,
170
+ "lstrip": false,
171
+ "rstrip": false,
172
+ "normalized": false,
173
+ "special": true
174
+ },
175
+ {
176
+ "id": 828,
177
+ "content": "<extra_id_2>",
178
+ "single_word": false,
179
+ "lstrip": false,
180
+ "rstrip": false,
181
+ "normalized": false,
182
+ "special": true
183
+ },
184
+ {
185
+ "id": 829,
186
+ "content": "<extra_id_3>",
187
+ "single_word": false,
188
+ "lstrip": false,
189
+ "rstrip": false,
190
+ "normalized": false,
191
+ "special": true
192
+ },
193
+ {
194
+ "id": 830,
195
+ "content": "<extra_id_4>",
196
+ "single_word": false,
197
+ "lstrip": false,
198
+ "rstrip": false,
199
+ "normalized": false,
200
+ "special": true
201
+ },
202
+ {
203
+ "id": 831,
204
+ "content": "<extra_id_5>",
205
+ "single_word": false,
206
+ "lstrip": false,
207
+ "rstrip": false,
208
+ "normalized": false,
209
+ "special": true
210
+ },
211
+ {
212
+ "id": 832,
213
+ "content": "<extra_id_6>",
214
+ "single_word": false,
215
+ "lstrip": false,
216
+ "rstrip": false,
217
+ "normalized": false,
218
+ "special": true
219
+ },
220
+ {
221
+ "id": 833,
222
+ "content": "<extra_id_7>",
223
+ "single_word": false,
224
+ "lstrip": false,
225
+ "rstrip": false,
226
+ "normalized": false,
227
+ "special": true
228
+ },
229
+ {
230
+ "id": 834,
231
+ "content": "<extra_id_8>",
232
+ "single_word": false,
233
+ "lstrip": false,
234
+ "rstrip": false,
235
+ "normalized": false,
236
+ "special": true
237
+ },
238
+ {
239
+ "id": 835,
240
+ "content": "<extra_id_9>",
241
+ "single_word": false,
242
+ "lstrip": false,
243
+ "rstrip": false,
244
+ "normalized": false,
245
+ "special": true
246
+ },
247
+ {
248
+ "id": 836,
249
+ "content": "<extra_id_10>",
250
+ "single_word": false,
251
+ "lstrip": false,
252
+ "rstrip": false,
253
+ "normalized": false,
254
+ "special": true
255
+ },
256
+ {
257
+ "id": 837,
258
+ "content": "<extra_id_11>",
259
+ "single_word": false,
260
+ "lstrip": false,
261
+ "rstrip": false,
262
+ "normalized": false,
263
+ "special": true
264
+ },
265
+ {
266
+ "id": 838,
267
+ "content": "<extra_id_12>",
268
+ "single_word": false,
269
+ "lstrip": false,
270
+ "rstrip": false,
271
+ "normalized": false,
272
+ "special": true
273
+ },
274
+ {
275
+ "id": 839,
276
+ "content": "<extra_id_13>",
277
+ "single_word": false,
278
+ "lstrip": false,
279
+ "rstrip": false,
280
+ "normalized": false,
281
+ "special": true
282
+ },
283
+ {
284
+ "id": 840,
285
+ "content": "<extra_id_14>",
286
+ "single_word": false,
287
+ "lstrip": false,
288
+ "rstrip": false,
289
+ "normalized": false,
290
+ "special": true
291
+ },
292
+ {
293
+ "id": 841,
294
+ "content": "<extra_id_15>",
295
+ "single_word": false,
296
+ "lstrip": false,
297
+ "rstrip": false,
298
+ "normalized": false,
299
+ "special": true
300
+ },
301
+ {
302
+ "id": 842,
303
+ "content": "<extra_id_16>",
304
+ "single_word": false,
305
+ "lstrip": false,
306
+ "rstrip": false,
307
+ "normalized": false,
308
+ "special": true
309
+ },
310
+ {
311
+ "id": 843,
312
+ "content": "<extra_id_17>",
313
+ "single_word": false,
314
+ "lstrip": false,
315
+ "rstrip": false,
316
+ "normalized": false,
317
+ "special": true
318
+ },
319
+ {
320
+ "id": 844,
321
+ "content": "<extra_id_18>",
322
+ "single_word": false,
323
+ "lstrip": false,
324
+ "rstrip": false,
325
+ "normalized": false,
326
+ "special": true
327
+ },
328
+ {
329
+ "id": 845,
330
+ "content": "<extra_id_19>",
331
+ "single_word": false,
332
+ "lstrip": false,
333
+ "rstrip": false,
334
+ "normalized": false,
335
+ "special": true
336
+ },
337
+ {
338
+ "id": 846,
339
+ "content": "<extra_id_20>",
340
+ "single_word": false,
341
+ "lstrip": false,
342
+ "rstrip": false,
343
+ "normalized": false,
344
+ "special": true
345
+ },
346
+ {
347
+ "id": 847,
348
+ "content": "<extra_id_21>",
349
+ "single_word": false,
350
+ "lstrip": false,
351
+ "rstrip": false,
352
+ "normalized": false,
353
+ "special": true
354
+ },
355
+ {
356
+ "id": 848,
357
+ "content": "<extra_id_22>",
358
+ "single_word": false,
359
+ "lstrip": false,
360
+ "rstrip": false,
361
+ "normalized": false,
362
+ "special": true
363
+ },
364
+ {
365
+ "id": 849,
366
+ "content": "<extra_id_23>",
367
+ "single_word": false,
368
+ "lstrip": false,
369
+ "rstrip": false,
370
+ "normalized": false,
371
+ "special": true
372
+ },
373
+ {
374
+ "id": 850,
375
+ "content": "<extra_id_24>",
376
+ "single_word": false,
377
+ "lstrip": false,
378
+ "rstrip": false,
379
+ "normalized": false,
380
+ "special": true
381
+ },
382
+ {
383
+ "id": 851,
384
+ "content": "<extra_id_25>",
385
+ "single_word": false,
386
+ "lstrip": false,
387
+ "rstrip": false,
388
+ "normalized": false,
389
+ "special": true
390
+ },
391
+ {
392
+ "id": 852,
393
+ "content": "<extra_id_26>",
394
+ "single_word": false,
395
+ "lstrip": false,
396
+ "rstrip": false,
397
+ "normalized": false,
398
+ "special": true
399
+ },
400
+ {
401
+ "id": 853,
402
+ "content": "<extra_id_27>",
403
+ "single_word": false,
404
+ "lstrip": false,
405
+ "rstrip": false,
406
+ "normalized": false,
407
+ "special": true
408
+ },
409
+ {
410
+ "id": 854,
411
+ "content": "<extra_id_28>",
412
+ "single_word": false,
413
+ "lstrip": false,
414
+ "rstrip": false,
415
+ "normalized": false,
416
+ "special": true
417
+ },
418
+ {
419
+ "id": 855,
420
+ "content": "<extra_id_29>",
421
+ "single_word": false,
422
+ "lstrip": false,
423
+ "rstrip": false,
424
+ "normalized": false,
425
+ "special": true
426
+ },
427
+ {
428
+ "id": 856,
429
+ "content": "<extra_id_30>",
430
+ "single_word": false,
431
+ "lstrip": false,
432
+ "rstrip": false,
433
+ "normalized": false,
434
+ "special": true
435
+ },
436
+ {
437
+ "id": 857,
438
+ "content": "<extra_id_31>",
439
+ "single_word": false,
440
+ "lstrip": false,
441
+ "rstrip": false,
442
+ "normalized": false,
443
+ "special": true
444
+ },
445
+ {
446
+ "id": 858,
447
+ "content": "<extra_id_32>",
448
+ "single_word": false,
449
+ "lstrip": false,
450
+ "rstrip": false,
451
+ "normalized": false,
452
+ "special": true
453
+ },
454
+ {
455
+ "id": 859,
456
+ "content": "<extra_id_33>",
457
+ "single_word": false,
458
+ "lstrip": false,
459
+ "rstrip": false,
460
+ "normalized": false,
461
+ "special": true
462
+ },
463
+ {
464
+ "id": 860,
465
+ "content": "<extra_id_34>",
466
+ "single_word": false,
467
+ "lstrip": false,
468
+ "rstrip": false,
469
+ "normalized": false,
470
+ "special": true
471
+ },
472
+ {
473
+ "id": 861,
474
+ "content": "<extra_id_35>",
475
+ "single_word": false,
476
+ "lstrip": false,
477
+ "rstrip": false,
478
+ "normalized": false,
479
+ "special": true
480
+ },
481
+ {
482
+ "id": 862,
483
+ "content": "<extra_id_36>",
484
+ "single_word": false,
485
+ "lstrip": false,
486
+ "rstrip": false,
487
+ "normalized": false,
488
+ "special": true
489
+ },
490
+ {
491
+ "id": 863,
492
+ "content": "<extra_id_37>",
493
+ "single_word": false,
494
+ "lstrip": false,
495
+ "rstrip": false,
496
+ "normalized": false,
497
+ "special": true
498
+ },
499
+ {
500
+ "id": 864,
501
+ "content": "<extra_id_38>",
502
+ "single_word": false,
503
+ "lstrip": false,
504
+ "rstrip": false,
505
+ "normalized": false,
506
+ "special": true
507
+ },
508
+ {
509
+ "id": 865,
510
+ "content": "<extra_id_39>",
511
+ "single_word": false,
512
+ "lstrip": false,
513
+ "rstrip": false,
514
+ "normalized": false,
515
+ "special": true
516
+ },
517
+ {
518
+ "id": 866,
519
+ "content": "<extra_id_40>",
520
+ "single_word": false,
521
+ "lstrip": false,
522
+ "rstrip": false,
523
+ "normalized": false,
524
+ "special": true
525
+ },
526
+ {
527
+ "id": 867,
528
+ "content": "<extra_id_41>",
529
+ "single_word": false,
530
+ "lstrip": false,
531
+ "rstrip": false,
532
+ "normalized": false,
533
+ "special": true
534
+ },
535
+ {
536
+ "id": 868,
537
+ "content": "<extra_id_42>",
538
+ "single_word": false,
539
+ "lstrip": false,
540
+ "rstrip": false,
541
+ "normalized": false,
542
+ "special": true
543
+ },
544
+ {
545
+ "id": 869,
546
+ "content": "<extra_id_43>",
547
+ "single_word": false,
548
+ "lstrip": false,
549
+ "rstrip": false,
550
+ "normalized": false,
551
+ "special": true
552
+ },
553
+ {
554
+ "id": 870,
555
+ "content": "<extra_id_44>",
556
+ "single_word": false,
557
+ "lstrip": false,
558
+ "rstrip": false,
559
+ "normalized": false,
560
+ "special": true
561
+ },
562
+ {
563
+ "id": 871,
564
+ "content": "<extra_id_45>",
565
+ "single_word": false,
566
+ "lstrip": false,
567
+ "rstrip": false,
568
+ "normalized": false,
569
+ "special": true
570
+ },
571
+ {
572
+ "id": 872,
573
+ "content": "<extra_id_46>",
574
+ "single_word": false,
575
+ "lstrip": false,
576
+ "rstrip": false,
577
+ "normalized": false,
578
+ "special": true
579
+ },
580
+ {
581
+ "id": 873,
582
+ "content": "<extra_id_47>",
583
+ "single_word": false,
584
+ "lstrip": false,
585
+ "rstrip": false,
586
+ "normalized": false,
587
+ "special": true
588
+ },
589
+ {
590
+ "id": 874,
591
+ "content": "<extra_id_48>",
592
+ "single_word": false,
593
+ "lstrip": false,
594
+ "rstrip": false,
595
+ "normalized": false,
596
+ "special": true
597
+ },
598
+ {
599
+ "id": 875,
600
+ "content": "<extra_id_49>",
601
+ "single_word": false,
602
+ "lstrip": false,
603
+ "rstrip": false,
604
+ "normalized": false,
605
+ "special": true
606
+ },
607
+ {
608
+ "id": 876,
609
+ "content": "<extra_id_50>",
610
+ "single_word": false,
611
+ "lstrip": false,
612
+ "rstrip": false,
613
+ "normalized": false,
614
+ "special": true
615
+ },
616
+ {
617
+ "id": 877,
618
+ "content": "<extra_id_51>",
619
+ "single_word": false,
620
+ "lstrip": false,
621
+ "rstrip": false,
622
+ "normalized": false,
623
+ "special": true
624
+ },
625
+ {
626
+ "id": 878,
627
+ "content": "<extra_id_52>",
628
+ "single_word": false,
629
+ "lstrip": false,
630
+ "rstrip": false,
631
+ "normalized": false,
632
+ "special": true
633
+ },
634
+ {
635
+ "id": 879,
636
+ "content": "<extra_id_53>",
637
+ "single_word": false,
638
+ "lstrip": false,
639
+ "rstrip": false,
640
+ "normalized": false,
641
+ "special": true
642
+ },
643
+ {
644
+ "id": 880,
645
+ "content": "<extra_id_54>",
646
+ "single_word": false,
647
+ "lstrip": false,
648
+ "rstrip": false,
649
+ "normalized": false,
650
+ "special": true
651
+ },
652
+ {
653
+ "id": 881,
654
+ "content": "<extra_id_55>",
655
+ "single_word": false,
656
+ "lstrip": false,
657
+ "rstrip": false,
658
+ "normalized": false,
659
+ "special": true
660
+ },
661
+ {
662
+ "id": 882,
663
+ "content": "<extra_id_56>",
664
+ "single_word": false,
665
+ "lstrip": false,
666
+ "rstrip": false,
667
+ "normalized": false,
668
+ "special": true
669
+ },
670
+ {
671
+ "id": 883,
672
+ "content": "<extra_id_57>",
673
+ "single_word": false,
674
+ "lstrip": false,
675
+ "rstrip": false,
676
+ "normalized": false,
677
+ "special": true
678
+ },
679
+ {
680
+ "id": 884,
681
+ "content": "<extra_id_58>",
682
+ "single_word": false,
683
+ "lstrip": false,
684
+ "rstrip": false,
685
+ "normalized": false,
686
+ "special": true
687
+ },
688
+ {
689
+ "id": 885,
690
+ "content": "<extra_id_59>",
691
+ "single_word": false,
692
+ "lstrip": false,
693
+ "rstrip": false,
694
+ "normalized": false,
695
+ "special": true
696
+ },
697
+ {
698
+ "id": 886,
699
+ "content": "<extra_id_60>",
700
+ "single_word": false,
701
+ "lstrip": false,
702
+ "rstrip": false,
703
+ "normalized": false,
704
+ "special": true
705
+ },
706
+ {
707
+ "id": 887,
708
+ "content": "<extra_id_61>",
709
+ "single_word": false,
710
+ "lstrip": false,
711
+ "rstrip": false,
712
+ "normalized": false,
713
+ "special": true
714
+ },
715
+ {
716
+ "id": 888,
717
+ "content": "<extra_id_62>",
718
+ "single_word": false,
719
+ "lstrip": false,
720
+ "rstrip": false,
721
+ "normalized": false,
722
+ "special": true
723
+ },
724
+ {
725
+ "id": 889,
726
+ "content": "<extra_id_63>",
727
+ "single_word": false,
728
+ "lstrip": false,
729
+ "rstrip": false,
730
+ "normalized": false,
731
+ "special": true
732
+ },
733
+ {
734
+ "id": 890,
735
+ "content": "<extra_id_64>",
736
+ "single_word": false,
737
+ "lstrip": false,
738
+ "rstrip": false,
739
+ "normalized": false,
740
+ "special": true
741
+ },
742
+ {
743
+ "id": 891,
744
+ "content": "<extra_id_65>",
745
+ "single_word": false,
746
+ "lstrip": false,
747
+ "rstrip": false,
748
+ "normalized": false,
749
+ "special": true
750
+ },
751
+ {
752
+ "id": 892,
753
+ "content": "<extra_id_66>",
754
+ "single_word": false,
755
+ "lstrip": false,
756
+ "rstrip": false,
757
+ "normalized": false,
758
+ "special": true
759
+ },
760
+ {
761
+ "id": 893,
762
+ "content": "<extra_id_67>",
763
+ "single_word": false,
764
+ "lstrip": false,
765
+ "rstrip": false,
766
+ "normalized": false,
767
+ "special": true
768
+ },
769
+ {
770
+ "id": 894,
771
+ "content": "<extra_id_68>",
772
+ "single_word": false,
773
+ "lstrip": false,
774
+ "rstrip": false,
775
+ "normalized": false,
776
+ "special": true
777
+ },
778
+ {
779
+ "id": 895,
780
+ "content": "<extra_id_69>",
781
+ "single_word": false,
782
+ "lstrip": false,
783
+ "rstrip": false,
784
+ "normalized": false,
785
+ "special": true
786
+ },
787
+ {
788
+ "id": 896,
789
+ "content": "<extra_id_70>",
790
+ "single_word": false,
791
+ "lstrip": false,
792
+ "rstrip": false,
793
+ "normalized": false,
794
+ "special": true
795
+ },
796
+ {
797
+ "id": 897,
798
+ "content": "<extra_id_71>",
799
+ "single_word": false,
800
+ "lstrip": false,
801
+ "rstrip": false,
802
+ "normalized": false,
803
+ "special": true
804
+ },
805
+ {
806
+ "id": 898,
807
+ "content": "<extra_id_72>",
808
+ "single_word": false,
809
+ "lstrip": false,
810
+ "rstrip": false,
811
+ "normalized": false,
812
+ "special": true
813
+ },
814
+ {
815
+ "id": 899,
816
+ "content": "<extra_id_73>",
817
+ "single_word": false,
818
+ "lstrip": false,
819
+ "rstrip": false,
820
+ "normalized": false,
821
+ "special": true
822
+ },
823
+ {
824
+ "id": 900,
825
+ "content": "<extra_id_74>",
826
+ "single_word": false,
827
+ "lstrip": false,
828
+ "rstrip": false,
829
+ "normalized": false,
830
+ "special": true
831
+ },
832
+ {
833
+ "id": 901,
834
+ "content": "<extra_id_75>",
835
+ "single_word": false,
836
+ "lstrip": false,
837
+ "rstrip": false,
838
+ "normalized": false,
839
+ "special": true
840
+ },
841
+ {
842
+ "id": 902,
843
+ "content": "<extra_id_76>",
844
+ "single_word": false,
845
+ "lstrip": false,
846
+ "rstrip": false,
847
+ "normalized": false,
848
+ "special": true
849
+ },
850
+ {
851
+ "id": 903,
852
+ "content": "<extra_id_77>",
853
+ "single_word": false,
854
+ "lstrip": false,
855
+ "rstrip": false,
856
+ "normalized": false,
857
+ "special": true
858
+ },
859
+ {
860
+ "id": 904,
861
+ "content": "<extra_id_78>",
862
+ "single_word": false,
863
+ "lstrip": false,
864
+ "rstrip": false,
865
+ "normalized": false,
866
+ "special": true
867
+ },
868
+ {
869
+ "id": 905,
870
+ "content": "<extra_id_79>",
871
+ "single_word": false,
872
+ "lstrip": false,
873
+ "rstrip": false,
874
+ "normalized": false,
875
+ "special": true
876
+ },
877
+ {
878
+ "id": 906,
879
+ "content": "<extra_id_80>",
880
+ "single_word": false,
881
+ "lstrip": false,
882
+ "rstrip": false,
883
+ "normalized": false,
884
+ "special": true
885
+ },
886
+ {
887
+ "id": 907,
888
+ "content": "<extra_id_81>",
889
+ "single_word": false,
890
+ "lstrip": false,
891
+ "rstrip": false,
892
+ "normalized": false,
893
+ "special": true
894
+ },
895
+ {
896
+ "id": 908,
897
+ "content": "<extra_id_82>",
898
+ "single_word": false,
899
+ "lstrip": false,
900
+ "rstrip": false,
901
+ "normalized": false,
902
+ "special": true
903
+ },
904
+ {
905
+ "id": 909,
906
+ "content": "<extra_id_83>",
907
+ "single_word": false,
908
+ "lstrip": false,
909
+ "rstrip": false,
910
+ "normalized": false,
911
+ "special": true
912
+ },
913
+ {
914
+ "id": 910,
915
+ "content": "<extra_id_84>",
916
+ "single_word": false,
917
+ "lstrip": false,
918
+ "rstrip": false,
919
+ "normalized": false,
920
+ "special": true
921
+ },
922
+ {
923
+ "id": 911,
924
+ "content": "<extra_id_85>",
925
+ "single_word": false,
926
+ "lstrip": false,
927
+ "rstrip": false,
928
+ "normalized": false,
929
+ "special": true
930
+ },
931
+ {
932
+ "id": 912,
933
+ "content": "<extra_id_86>",
934
+ "single_word": false,
935
+ "lstrip": false,
936
+ "rstrip": false,
937
+ "normalized": false,
938
+ "special": true
939
+ },
940
+ {
941
+ "id": 913,
942
+ "content": "<extra_id_87>",
943
+ "single_word": false,
944
+ "lstrip": false,
945
+ "rstrip": false,
946
+ "normalized": false,
947
+ "special": true
948
+ },
949
+ {
950
+ "id": 914,
951
+ "content": "<extra_id_88>",
952
+ "single_word": false,
953
+ "lstrip": false,
954
+ "rstrip": false,
955
+ "normalized": false,
956
+ "special": true
957
+ },
958
+ {
959
+ "id": 915,
960
+ "content": "<extra_id_89>",
961
+ "single_word": false,
962
+ "lstrip": false,
963
+ "rstrip": false,
964
+ "normalized": false,
965
+ "special": true
966
+ },
967
+ {
968
+ "id": 916,
969
+ "content": "<extra_id_90>",
970
+ "single_word": false,
971
+ "lstrip": false,
972
+ "rstrip": false,
973
+ "normalized": false,
974
+ "special": true
975
+ },
976
+ {
977
+ "id": 917,
978
+ "content": "<extra_id_91>",
979
+ "single_word": false,
980
+ "lstrip": false,
981
+ "rstrip": false,
982
+ "normalized": false,
983
+ "special": true
984
+ },
985
+ {
986
+ "id": 918,
987
+ "content": "<extra_id_92>",
988
+ "single_word": false,
989
+ "lstrip": false,
990
+ "rstrip": false,
991
+ "normalized": false,
992
+ "special": true
993
+ },
994
+ {
995
+ "id": 919,
996
+ "content": "<extra_id_93>",
997
+ "single_word": false,
998
+ "lstrip": false,
999
+ "rstrip": false,
1000
+ "normalized": false,
1001
+ "special": true
1002
+ },
1003
+ {
1004
+ "id": 920,
1005
+ "content": "<extra_id_94>",
1006
+ "single_word": false,
1007
+ "lstrip": false,
1008
+ "rstrip": false,
1009
+ "normalized": false,
1010
+ "special": true
1011
+ },
1012
+ {
1013
+ "id": 921,
1014
+ "content": "<extra_id_95>",
1015
+ "single_word": false,
1016
+ "lstrip": false,
1017
+ "rstrip": false,
1018
+ "normalized": false,
1019
+ "special": true
1020
+ },
1021
+ {
1022
+ "id": 922,
1023
+ "content": "<extra_id_96>",
1024
+ "single_word": false,
1025
+ "lstrip": false,
1026
+ "rstrip": false,
1027
+ "normalized": false,
1028
+ "special": true
1029
+ },
1030
+ {
1031
+ "id": 923,
1032
+ "content": "<extra_id_97>",
1033
+ "single_word": false,
1034
+ "lstrip": false,
1035
+ "rstrip": false,
1036
+ "normalized": false,
1037
+ "special": true
1038
+ },
1039
+ {
1040
+ "id": 924,
1041
+ "content": "<extra_id_98>",
1042
+ "single_word": false,
1043
+ "lstrip": false,
1044
+ "rstrip": false,
1045
+ "normalized": false,
1046
+ "special": true
1047
+ },
1048
+ {
1049
+ "id": 925,
1050
+ "content": "<extra_id_99>",
1051
+ "single_word": false,
1052
+ "lstrip": false,
1053
+ "rstrip": false,
1054
+ "normalized": false,
1055
+ "special": true
1056
+ }
1057
+ ],
1058
+ "normalizer": {
1059
+ "type": "NFKC"
1060
+ },
1061
+ "pre_tokenizer": {
1062
+ "type": "Metaspace",
1063
+ "replacement": "▁",
1064
+ "prepend_scheme": "never",
1065
+ "split": true
1066
+ },
1067
+ "post_processor": null,
1068
+ "decoder": {
1069
+ "type": "Metaspace",
1070
+ "replacement": "▁",
1071
+ "prepend_scheme": "never",
1072
+ "split": true
1073
+ },
1074
+ "model": {
1075
+ "type": "BPE",
1076
+ "dropout": null,
1077
+ "unk_token": "<unk>",
1078
+ "continuing_subword_prefix": null,
1079
+ "end_of_word_suffix": null,
1080
+ "fuse_unk": false,
1081
+ "byte_fallback": false,
1082
+ "ignore_merges": false,
1083
+ "vocab": {
1084
+ "<unk>": 0,
1085
+ "0": 1,
1086
+ "1": 2,
1087
+ "2": 3,
1088
+ "3": 4,
1089
+ "4": 5,
1090
+ "5": 6,
1091
+ "6": 7,
1092
+ "7": 8,
1093
+ "8": 9,
1094
+ "9": 10,
1095
+ "<": 11,
1096
+ ">": 12,
1097
+ "a": 13,
1098
+ "b": 14,
1099
+ "c": 15,
1100
+ "d": 16,
1101
+ "e": 17,
1102
+ "f": 18,
1103
+ "g": 19,
1104
+ "h": 20,
1105
+ "i": 21,
1106
+ "j": 22,
1107
+ "k": 23,
1108
+ "l": 24,
1109
+ "m": 25,
1110
+ "n": 26,
1111
+ "o": 27,
1112
+ "p": 28,
1113
+ "q": 29,
1114
+ "r": 30,
1115
+ "s": 31,
1116
+ "t": 32,
1117
+ "u": 33,
1118
+ "v": 34,
1119
+ "w": 35,
1120
+ "x": 36,
1121
+ "y": 37,
1122
+ "z": 38,
1123
+ "á": 39,
1124
+ "è": 40,
1125
+ "é": 41,
1126
+ "؁": 42,
1127
+ "،": 43,
1128
+ "ؐ": 44,
1129
+ "ؑ": 45,
1130
+ "ؓ": 46,
1131
+ "ء": 47,
1132
+ "آ": 48,
1133
+ "ؤ": 49,
1134
+ "ئ": 50,
1135
+ "ا": 51,
1136
+ "ب": 52,
1137
+ "ة": 53,
1138
+ "ت": 54,
1139
+ "ث": 55,
1140
+ "ج": 56,
1141
+ "ح": 57,
1142
+ "خ": 58,
1143
+ "د": 59,
1144
+ "ذ": 60,
1145
+ "ر": 61,
1146
+ "ز": 62,
1147
+ "س": 63,
1148
+ "ش": 64,
1149
+ "ص": 65,
1150
+ "ض": 66,
1151
+ "ط": 67,
1152
+ "ظ": 68,
1153
+ "ع": 69,
1154
+ "غ": 70,
1155
+ "ـ": 71,
1156
+ "ف": 72,
1157
+ "ق": 73,
1158
+ "ك": 74,
1159
+ "ل": 75,
1160
+ "م": 76,
1161
+ "ن": 77,
1162
+ "ه": 78,
1163
+ "و": 79,
1164
+ "ي": 80,
1165
+ "ً": 81,
1166
+ "ٌ": 82,
1167
+ "َ": 83,
1168
+ "ُ": 84,
1169
+ "ِ": 85,
1170
+ "ّ": 86,
1171
+ "ٓ": 87,
1172
+ "ٔ": 88,
1173
+ "ٕ": 89,
1174
+ "ٖ": 90,
1175
+ "ٗ": 91,
1176
+ "٘": 92,
1177
+ "ٚ": 93,
1178
+ "ٛ": 94,
1179
+ "٠": 95,
1180
+ "١": 96,
1181
+ "٢": 97,
1182
+ "٣": 98,
1183
+ "٤": 99,
1184
+ "٥": 100,
1185
+ "٦": 101,
1186
+ "٧": 102,
1187
+ "٨": 103,
1188
+ "٩": 104,
1189
+ "ٮ": 105,
1190
+ "ٰ": 106,
1191
+ "ٲ": 107,
1192
+ "ٴ": 108,
1193
+ "ٹ": 109,
1194
+ "ٺ": 110,
1195
+ "ٻ": 111,
1196
+ "ٽ": 112,
1197
+ "پ": 113,
1198
+ "ٿ": 114,
1199
+ "ڀ": 115,
1200
+ "ڃ": 116,
1201
+ "ڄ": 117,
1202
+ "چ": 118,
1203
+ "ڇ": 119,
1204
+ "ڈ": 120,
1205
+ "ڊ": 121,
1206
+ "ڌ": 122,
1207
+ "ڍ": 123,
1208
+ "ڏ": 124,
1209
+ "ڑ": 125,
1210
+ "ژ": 126,
1211
+ "ڙ": 127,
1212
+ "ڦ": 128,
1213
+ "ک": 129,
1214
+ "ڪ": 130,
1215
+ "گ": 131,
1216
+ "ڱ": 132,
1217
+ "ڳ": 133,
1218
+ "ں": 134,
1219
+ "ڻ": 135,
1220
+ "ھ": 136,
1221
+ "ہ": 137,
1222
+ "ۂ": 138,
1223
+ "ۃ": 139,
1224
+ "ۄ": 140,
1225
+ "ی": 141,
1226
+ "ۍ": 142,
1227
+ "ے": 143,
1228
+ "ۓ": 144,
1229
+ "۪": 145,
1230
+ "ۭ": 146,
1231
+ "۶": 147,
1232
+ "۾": 148,
1233
+ "ँ": 149,
1234
+ "ं": 150,
1235
+ "ः": 151,
1236
+ "अ": 152,
1237
+ "आ": 153,
1238
+ "इ": 154,
1239
+ "ई": 155,
1240
+ "उ": 156,
1241
+ "ऊ": 157,
1242
+ "ऋ": 158,
1243
+ "ऌ": 159,
1244
+ "ऍ": 160,
1245
+ "ए": 161,
1246
+ "ऐ": 162,
1247
+ "ऑ": 163,
1248
+ "ओ": 164,
1249
+ "औ": 165,
1250
+ "क": 166,
1251
+ "ख": 167,
1252
+ "ग": 168,
1253
+ "घ": 169,
1254
+ "ङ": 170,
1255
+ "च": 171,
1256
+ "छ": 172,
1257
+ "ज": 173,
1258
+ "झ": 174,
1259
+ "ञ": 175,
1260
+ "ट": 176,
1261
+ "ठ": 177,
1262
+ "ड": 178,
1263
+ "ढ": 179,
1264
+ "ण": 180,
1265
+ "त": 181,
1266
+ "थ": 182,
1267
+ "द": 183,
1268
+ "ध": 184,
1269
+ "न": 185,
1270
+ "प": 186,
1271
+ "फ": 187,
1272
+ "ब": 188,
1273
+ "भ": 189,
1274
+ "म": 190,
1275
+ "य": 191,
1276
+ "र": 192,
1277
+ "ऱ": 193,
1278
+ "ल": 194,
1279
+ "ळ": 195,
1280
+ "व": 196,
1281
+ "श": 197,
1282
+ "ष": 198,
1283
+ "स": 199,
1284
+ "ह": 200,
1285
+ "़": 201,
1286
+ "ऽ": 202,
1287
+ "ा": 203,
1288
+ "ि": 204,
1289
+ "ी": 205,
1290
+ "ु": 206,
1291
+ "ू": 207,
1292
+ "ृ": 208,
1293
+ "ॅ": 209,
1294
+ "ॆ": 210,
1295
+ "े": 211,
1296
+ "ै": 212,
1297
+ "ॉ": 213,
1298
+ "ॊ": 214,
1299
+ "ो": 215,
1300
+ "ौ": 216,
1301
+ "्": 217,
1302
+ "ॐ": 218,
1303
+ "ॠ": 219,
1304
+ "।": 220,
1305
+ "॥": 221,
1306
+ "०": 222,
1307
+ "१": 223,
1308
+ "२": 224,
1309
+ "३": 225,
1310
+ "४": 226,
1311
+ "५": 227,
1312
+ "६": 228,
1313
+ "७": 229,
1314
+ "८": 230,
1315
+ "९": 231,
1316
+ "॰": 232,
1317
+ "ঁ": 233,
1318
+ "ং": 234,
1319
+ "ঃ": 235,
1320
+ "অ": 236,
1321
+ "আ": 237,
1322
+ "ই": 238,
1323
+ "ঈ": 239,
1324
+ "উ": 240,
1325
+ "ঊ": 241,
1326
+ "ঋ": 242,
1327
+ "এ": 243,
1328
+ "ঐ": 244,
1329
+ "ও": 245,
1330
+ "ঔ": 246,
1331
+ "ক": 247,
1332
+ "খ": 248,
1333
+ "গ": 249,
1334
+ "ঘ": 250,
1335
+ "ঙ": 251,
1336
+ "চ": 252,
1337
+ "ছ": 253,
1338
+ "জ": 254,
1339
+ "ঝ": 255,
1340
+ "ঞ": 256,
1341
+ "ট": 257,
1342
+ "ঠ": 258,
1343
+ "ড": 259,
1344
+ "ঢ": 260,
1345
+ "ণ": 261,
1346
+ "ত": 262,
1347
+ "থ": 263,
1348
+ "দ": 264,
1349
+ "ধ": 265,
1350
+ "ন": 266,
1351
+ "প": 267,
1352
+ "ফ": 268,
1353
+ "ব": 269,
1354
+ "ভ": 270,
1355
+ "ম": 271,
1356
+ "য": 272,
1357
+ "র": 273,
1358
+ "ল": 274,
1359
+ "শ": 275,
1360
+ "ষ": 276,
1361
+ "স": 277,
1362
+ "হ": 278,
1363
+ "়": 279,
1364
+ "া": 280,
1365
+ "ি": 281,
1366
+ "ী": 282,
1367
+ "ু": 283,
1368
+ "ূ": 284,
1369
+ "ৃ": 285,
1370
+ "ে": 286,
1371
+ "ৈ": 287,
1372
+ "ো": 288,
1373
+ "ৌ": 289,
1374
+ "্": 290,
1375
+ "ৎ": 291,
1376
+ "০": 292,
1377
+ "১": 293,
1378
+ "২": 294,
1379
+ "৩": 295,
1380
+ "৪": 296,
1381
+ "৫": 297,
1382
+ "৬": 298,
1383
+ "৭": 299,
1384
+ "৮": 300,
1385
+ "৯": 301,
1386
+ "ৰ": 302,
1387
+ "ৱ": 303,
1388
+ "ਂ": 304,
1389
+ "ਃ": 305,
1390
+ "ਅ": 306,
1391
+ "ਆ": 307,
1392
+ "ਇ": 308,
1393
+ "ਈ": 309,
1394
+ "ਉ": 310,
1395
+ "ਊ": 311,
1396
+ "ਏ": 312,
1397
+ "ਐ": 313,
1398
+ "ਓ": 314,
1399
+ "ਔ": 315,
1400
+ "ਕ": 316,
1401
+ "ਖ": 317,
1402
+ "ਗ": 318,
1403
+ "ਘ": 319,
1404
+ "ਙ": 320,
1405
+ "ਚ": 321,
1406
+ "ਛ": 322,
1407
+ "ਜ": 323,
1408
+ "ਝ": 324,
1409
+ "ਞ": 325,
1410
+ "ਟ": 326,
1411
+ "ਠ": 327,
1412
+ "ਡ": 328,
1413
+ "ਢ": 329,
1414
+ "ਣ": 330,
1415
+ "ਤ": 331,
1416
+ "ਥ": 332,
1417
+ "ਦ": 333,
1418
+ "ਧ": 334,
1419
+ "ਨ": 335,
1420
+ "ਪ": 336,
1421
+ "ਫ": 337,
1422
+ "ਬ": 338,
1423
+ "ਭ": 339,
1424
+ "ਮ": 340,
1425
+ "ਯ": 341,
1426
+ "ਰ": 342,
1427
+ "ਲ": 343,
1428
+ "ਵ": 344,
1429
+ "ਸ": 345,
1430
+ "ਹ": 346,
1431
+ "਼": 347,
1432
+ "ਾ": 348,
1433
+ "ਿ": 349,
1434
+ "ੀ": 350,
1435
+ "ੁ": 351,
1436
+ "ੂ": 352,
1437
+ "ੇ": 353,
1438
+ "ੈ": 354,
1439
+ "ੋ": 355,
1440
+ "ੌ": 356,
1441
+ "੍": 357,
1442
+ "ੜ": 358,
1443
+ "੦": 359,
1444
+ "੧": 360,
1445
+ "੨": 361,
1446
+ "੩": 362,
1447
+ "੪": 363,
1448
+ "੫": 364,
1449
+ "੬": 365,
1450
+ "੭": 366,
1451
+ "੮": 367,
1452
+ "੯": 368,
1453
+ "ੰ": 369,
1454
+ "ੱ": 370,
1455
+ "ੲ": 371,
1456
+ "ੳ": 372,
1457
+ "ઁ": 373,
1458
+ "ં": 374,
1459
+ "ઃ": 375,
1460
+ "અ": 376,
1461
+ "આ": 377,
1462
+ "ઇ": 378,
1463
+ "ઈ": 379,
1464
+ "ઉ": 380,
1465
+ "ઊ": 381,
1466
+ "ઋ": 382,
1467
+ "ઍ": 383,
1468
+ "એ": 384,
1469
+ "ઐ": 385,
1470
+ "ઑ": 386,
1471
+ "ઓ": 387,
1472
+ "ઔ": 388,
1473
+ "ક": 389,
1474
+ "ખ": 390,
1475
+ "ગ": 391,
1476
+ "ઘ": 392,
1477
+ "ઙ": 393,
1478
+ "ચ": 394,
1479
+ "છ": 395,
1480
+ "જ": 396,
1481
+ "ઝ": 397,
1482
+ "ઞ": 398,
1483
+ "ટ": 399,
1484
+ "ઠ": 400,
1485
+ "ડ": 401,
1486
+ "ઢ": 402,
1487
+ "ણ": 403,
1488
+ "ત": 404,
1489
+ "થ": 405,
1490
+ "દ": 406,
1491
+ "ધ": 407,
1492
+ "ન": 408,
1493
+ "પ": 409,
1494
+ "ફ": 410,
1495
+ "બ": 411,
1496
+ "ભ": 412,
1497
+ "મ": 413,
1498
+ "ય": 414,
1499
+ "ર": 415,
1500
+ "લ": 416,
1501
+ "ળ": 417,
1502
+ "વ": 418,
1503
+ "શ": 419,
1504
+ "ષ": 420,
1505
+ "સ": 421,
1506
+ "હ": 422,
1507
+ "઼": 423,
1508
+ "ા": 424,
1509
+ "િ": 425,
1510
+ "ી": 426,
1511
+ "ુ": 427,
1512
+ "ૂ": 428,
1513
+ "ૃ": 429,
1514
+ "ૅ": 430,
1515
+ "ે": 431,
1516
+ "ૈ": 432,
1517
+ "ૉ": 433,
1518
+ "ો": 434,
1519
+ "ૌ": 435,
1520
+ "્": 436,
1521
+ "ૐ": 437,
1522
+ "ૠ": 438,
1523
+ "૦": 439,
1524
+ "૧": 440,
1525
+ "૨": 441,
1526
+ "૩": 442,
1527
+ "૪": 443,
1528
+ "૫": 444,
1529
+ "૬": 445,
1530
+ "૭": 446,
1531
+ "૮": 447,
1532
+ "૯": 448,
1533
+ "૰": 449,
1534
+ "ଁ": 450,
1535
+ "ଂ": 451,
1536
+ "ଃ": 452,
1537
+ "ଅ": 453,
1538
+ "ଆ": 454,
1539
+ "ଇ": 455,
1540
+ "ଈ": 456,
1541
+ "ଉ": 457,
1542
+ "ଊ": 458,
1543
+ "ଋ": 459,
1544
+ "ଏ": 460,
1545
+ "ଐ": 461,
1546
+ "ଓ": 462,
1547
+ "ଔ": 463,
1548
+ "କ": 464,
1549
+ "ଖ": 465,
1550
+ "ଗ": 466,
1551
+ "ଘ": 467,
1552
+ "ଙ": 468,
1553
+ "ଚ": 469,
1554
+ "ଛ": 470,
1555
+ "ଜ": 471,
1556
+ "ଝ": 472,
1557
+ "ଞ": 473,
1558
+ "ଟ": 474,
1559
+ "ଠ": 475,
1560
+ "ଡ": 476,
1561
+ "ଢ": 477,
1562
+ "ଣ": 478,
1563
+ "ତ": 479,
1564
+ "ଥ": 480,
1565
+ "ଦ": 481,
1566
+ "ଧ": 482,
1567
+ "ନ": 483,
1568
+ "ପ": 484,
1569
+ "ଫ": 485,
1570
+ "ବ": 486,
1571
+ "ଭ": 487,
1572
+ "ମ": 488,
1573
+ "ଯ": 489,
1574
+ "ର": 490,
1575
+ "ଲ": 491,
1576
+ "ଳ": 492,
1577
+ "ଶ": 493,
1578
+ "ଷ": 494,
1579
+ "ସ": 495,
1580
+ "ହ": 496,
1581
+ "଼": 497,
1582
+ "ା": 498,
1583
+ "ି": 499,
1584
+ "ୀ": 500,
1585
+ "ୁ": 501,
1586
+ "ୂ": 502,
1587
+ "ୃ": 503,
1588
+ "େ": 504,
1589
+ "ୈ": 505,
1590
+ "ୋ": 506,
1591
+ "ୌ": 507,
1592
+ "୍": 508,
1593
+ "୘": 509,
1594
+ "ୟ": 510,
1595
+ "ୢ": 511,
1596
+ "୦": 512,
1597
+ "୧": 513,
1598
+ "୨": 514,
1599
+ "୩": 515,
1600
+ "୪": 516,
1601
+ "୫": 517,
1602
+ "୬": 518,
1603
+ "୭": 519,
1604
+ "୮": 520,
1605
+ "୯": 521,
1606
+ "ୱ": 522,
1607
+ "ஂ": 523,
1608
+ "ஃ": 524,
1609
+ "அ": 525,
1610
+ "ஆ": 526,
1611
+ "இ": 527,
1612
+ "ஈ": 528,
1613
+ "உ": 529,
1614
+ "ஊ": 530,
1615
+ "எ": 531,
1616
+ "ஏ": 532,
1617
+ "ஐ": 533,
1618
+ "ஒ": 534,
1619
+ "ஓ": 535,
1620
+ "ஔ": 536,
1621
+ "க": 537,
1622
+ "ங": 538,
1623
+ "ச": 539,
1624
+ "ஜ": 540,
1625
+ "ஞ": 541,
1626
+ "ட": 542,
1627
+ "ண": 543,
1628
+ "த": 544,
1629
+ "ந": 545,
1630
+ "ன": 546,
1631
+ "ப": 547,
1632
+ "ம": 548,
1633
+ "ய": 549,
1634
+ "ர": 550,
1635
+ "ற": 551,
1636
+ "ல": 552,
1637
+ "ள": 553,
1638
+ "ழ": 554,
1639
+ "வ": 555,
1640
+ "ஷ": 556,
1641
+ "ஸ": 557,
1642
+ "ஹ": 558,
1643
+ "ா": 559,
1644
+ "ி": 560,
1645
+ "ீ": 561,
1646
+ "ு": 562,
1647
+ "ூ": 563,
1648
+ "ெ": 564,
1649
+ "ே": 565,
1650
+ "ை": 566,
1651
+ "ொ": 567,
1652
+ "ோ": 568,
1653
+ "ௌ": 569,
1654
+ "்": 570,
1655
+ "௦": 571,
1656
+ "௧": 572,
1657
+ "௨": 573,
1658
+ "௩": 574,
1659
+ "௪": 575,
1660
+ "௫": 576,
1661
+ "௬": 577,
1662
+ "௭": 578,
1663
+ "௮": 579,
1664
+ "௯": 580,
1665
+ "ఁ": 581,
1666
+ "ం": 582,
1667
+ "ః": 583,
1668
+ "అ": 584,
1669
+ "ఆ": 585,
1670
+ "ఇ": 586,
1671
+ "ఈ": 587,
1672
+ "ఉ": 588,
1673
+ "ఊ": 589,
1674
+ "ఋ": 590,
1675
+ "ఎ": 591,
1676
+ "ఏ": 592,
1677
+ "ఐ": 593,
1678
+ "ఒ": 594,
1679
+ "ఓ": 595,
1680
+ "ఔ": 596,
1681
+ "క": 597,
1682
+ "ఖ": 598,
1683
+ "గ": 599,
1684
+ "ఘ": 600,
1685
+ "ఙ": 601,
1686
+ "చ": 602,
1687
+ "ఛ": 603,
1688
+ "జ": 604,
1689
+ "ఝ": 605,
1690
+ "ఞ": 606,
1691
+ "ట": 607,
1692
+ "ఠ": 608,
1693
+ "డ": 609,
1694
+ "ఢ": 610,
1695
+ "ణ": 611,
1696
+ "త": 612,
1697
+ "థ": 613,
1698
+ "ద": 614,
1699
+ "ధ": 615,
1700
+ "న": 616,
1701
+ "ప": 617,
1702
+ "ఫ": 618,
1703
+ "బ": 619,
1704
+ "భ": 620,
1705
+ "మ": 621,
1706
+ "య": 622,
1707
+ "ర": 623,
1708
+ "ఱ": 624,
1709
+ "ల": 625,
1710
+ "ళ": 626,
1711
+ "వ": 627,
1712
+ "శ": 628,
1713
+ "ష": 629,
1714
+ "స": 630,
1715
+ "హ": 631,
1716
+ "ా": 632,
1717
+ "ి": 633,
1718
+ "ీ": 634,
1719
+ "ు": 635,
1720
+ "ూ": 636,
1721
+ "ృ": 637,
1722
+ "ౄ": 638,
1723
+ "ె": 639,
1724
+ "ే": 640,
1725
+ "ై": 641,
1726
+ "ొ": 642,
1727
+ "ో": 643,
1728
+ "ౌ": 644,
1729
+ "్": 645,
1730
+ "౦": 646,
1731
+ "౧": 647,
1732
+ "౨": 648,
1733
+ "౩": 649,
1734
+ "౪": 650,
1735
+ "౫": 651,
1736
+ "౬": 652,
1737
+ "౭": 653,
1738
+ "౮": 654,
1739
+ "౯": 655,
1740
+ "ಂ": 656,
1741
+ "ಃ": 657,
1742
+ "ಅ": 658,
1743
+ "ಆ": 659,
1744
+ "ಇ": 660,
1745
+ "ಈ": 661,
1746
+ "ಉ": 662,
1747
+ "ಊ": 663,
1748
+ "ಋ": 664,
1749
+ "ಎ": 665,
1750
+ "ಏ": 666,
1751
+ "ಐ": 667,
1752
+ "ಒ": 668,
1753
+ "ಓ": 669,
1754
+ "ಔ": 670,
1755
+ "ಕ": 671,
1756
+ "ಖ": 672,
1757
+ "ಗ": 673,
1758
+ "ಘ": 674,
1759
+ "ಚ": 675,
1760
+ "ಛ": 676,
1761
+ "ಜ": 677,
1762
+ "ಝ": 678,
1763
+ "ಞ": 679,
1764
+ "ಟ": 680,
1765
+ "ಠ": 681,
1766
+ "ಡ": 682,
1767
+ "ಢ": 683,
1768
+ "ಣ": 684,
1769
+ "ತ": 685,
1770
+ "ಥ": 686,
1771
+ "ದ": 687,
1772
+ "ಧ": 688,
1773
+ "ನ": 689,
1774
+ "ಪ": 690,
1775
+ "ಫ": 691,
1776
+ "ಬ": 692,
1777
+ "ಭ": 693,
1778
+ "ಮ": 694,
1779
+ "ಯ": 695,
1780
+ "ರ": 696,
1781
+ "ಲ": 697,
1782
+ "ಳ": 698,
1783
+ "ವ": 699,
1784
+ "ಶ": 700,
1785
+ "ಷ": 701,
1786
+ "ಸ": 702,
1787
+ "ಹ": 703,
1788
+ "಼": 704,
1789
+ "ಾ": 705,
1790
+ "ಿ": 706,
1791
+ "ೀ": 707,
1792
+ "ು": 708,
1793
+ "ೂ": 709,
1794
+ "ೃ": 710,
1795
+ "ೆ": 711,
1796
+ "ೇ": 712,
1797
+ "ೈ": 713,
1798
+ "ೊ": 714,
1799
+ "ೋ": 715,
1800
+ "ೌ": 716,
1801
+ "್": 717,
1802
+ "೦": 718,
1803
+ "೧": 719,
1804
+ "೨": 720,
1805
+ "೩": 721,
1806
+ "೪": 722,
1807
+ "೫": 723,
1808
+ "೬": 724,
1809
+ "೭": 725,
1810
+ "೮": 726,
1811
+ "೯": 727,
1812
+ "ം": 728,
1813
+ "ഃ": 729,
1814
+ "അ": 730,
1815
+ "ആ": 731,
1816
+ "ഇ": 732,
1817
+ "ഈ": 733,
1818
+ "ഉ": 734,
1819
+ "ഊ": 735,
1820
+ "ഋ": 736,
1821
+ "എ": 737,
1822
+ "ഏ": 738,
1823
+ "ഐ": 739,
1824
+ "ഒ": 740,
1825
+ "ഓ": 741,
1826
+ "ഔ": 742,
1827
+ "ക": 743,
1828
+ "ഖ": 744,
1829
+ "ഗ": 745,
1830
+ "ഘ": 746,
1831
+ "ങ": 747,
1832
+ "ച": 748,
1833
+ "ഛ": 749,
1834
+ "ജ": 750,
1835
+ "ഝ": 751,
1836
+ "ഞ": 752,
1837
+ "ട": 753,
1838
+ "ഠ": 754,
1839
+ "ഡ": 755,
1840
+ "ഢ": 756,
1841
+ "ണ": 757,
1842
+ "ത": 758,
1843
+ "ഥ": 759,
1844
+ "ദ": 760,
1845
+ "ധ": 761,
1846
+ "ന": 762,
1847
+ "പ": 763,
1848
+ "ഫ": 764,
1849
+ "ബ": 765,
1850
+ "ഭ": 766,
1851
+ "മ": 767,
1852
+ "യ": 768,
1853
+ "ര": 769,
1854
+ "റ": 770,
1855
+ "ല": 771,
1856
+ "ള": 772,
1857
+ "ഴ": 773,
1858
+ "വ": 774,
1859
+ "ശ": 775,
1860
+ "ഷ": 776,
1861
+ "സ": 777,
1862
+ "ഹ": 778,
1863
+ "ാ": 779,
1864
+ "ി": 780,
1865
+ "ീ": 781,
1866
+ "ു": 782,
1867
+ "ൂ": 783,
1868
+ "ൃ": 784,
1869
+ "െ": 785,
1870
+ "േ": 786,
1871
+ "ൈ": 787,
1872
+ "ൊ": 788,
1873
+ "ോ": 789,
1874
+ "ൌ": 790,
1875
+ "്": 791,
1876
+ "ൗ": 792,
1877
+ "൦": 793,
1878
+ "൧": 794,
1879
+ "൨": 795,
1880
+ "൩": 796,
1881
+ "൪": 797,
1882
+ "൫": 798,
1883
+ "൬": 799,
1884
+ "൭": 800,
1885
+ "൮": 801,
1886
+ "൯": 802,
1887
+ "ൺ": 803,
1888
+ "ൻ": 804,
1889
+ "ർ": 805,
1890
+ "ൽ": 806,
1891
+ "ൾ": 807,
1892
+ "‌": 808,
1893
+ "‍": 809,
1894
+ "▁": 810
1895
+ },
1896
+ "merges": []
1897
+ }
1898
+ }
tokenizer_config.json ADDED
@@ -0,0 +1,1054 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "811": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "812": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ },
27
+ "813": {
28
+ "content": "<pad>",
29
+ "lstrip": false,
30
+ "normalized": false,
31
+ "rstrip": false,
32
+ "single_word": false,
33
+ "special": true
34
+ },
35
+ "814": {
36
+ "content": "<arb>",
37
+ "lstrip": false,
38
+ "normalized": false,
39
+ "rstrip": false,
40
+ "single_word": false,
41
+ "special": true
42
+ },
43
+ "815": {
44
+ "content": "<asm>",
45
+ "lstrip": false,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
+ },
51
+ "816": {
52
+ "content": "<ben>",
53
+ "lstrip": false,
54
+ "normalized": false,
55
+ "rstrip": false,
56
+ "single_word": false,
57
+ "special": true
58
+ },
59
+ "817": {
60
+ "content": "<guj>",
61
+ "lstrip": false,
62
+ "normalized": false,
63
+ "rstrip": false,
64
+ "single_word": false,
65
+ "special": true
66
+ },
67
+ "818": {
68
+ "content": "<hin>",
69
+ "lstrip": false,
70
+ "normalized": false,
71
+ "rstrip": false,
72
+ "single_word": false,
73
+ "special": true
74
+ },
75
+ "819": {
76
+ "content": "<kan>",
77
+ "lstrip": false,
78
+ "normalized": false,
79
+ "rstrip": false,
80
+ "single_word": false,
81
+ "special": true
82
+ },
83
+ "820": {
84
+ "content": "<mal>",
85
+ "lstrip": false,
86
+ "normalized": false,
87
+ "rstrip": false,
88
+ "single_word": false,
89
+ "special": true
90
+ },
91
+ "821": {
92
+ "content": "<mar>",
93
+ "lstrip": false,
94
+ "normalized": false,
95
+ "rstrip": false,
96
+ "single_word": false,
97
+ "special": true
98
+ },
99
+ "822": {
100
+ "content": "<ori>",
101
+ "lstrip": false,
102
+ "normalized": false,
103
+ "rstrip": false,
104
+ "single_word": false,
105
+ "special": true
106
+ },
107
+ "823": {
108
+ "content": "<pan>",
109
+ "lstrip": false,
110
+ "normalized": false,
111
+ "rstrip": false,
112
+ "single_word": false,
113
+ "special": true
114
+ },
115
+ "824": {
116
+ "content": "<tam>",
117
+ "lstrip": false,
118
+ "normalized": false,
119
+ "rstrip": false,
120
+ "single_word": false,
121
+ "special": true
122
+ },
123
+ "825": {
124
+ "content": "<tel>",
125
+ "lstrip": false,
126
+ "normalized": false,
127
+ "rstrip": false,
128
+ "single_word": false,
129
+ "special": true
130
+ },
131
+ "826": {
132
+ "content": "<extra_id_0>",
133
+ "lstrip": false,
134
+ "normalized": false,
135
+ "rstrip": false,
136
+ "single_word": false,
137
+ "special": true
138
+ },
139
+ "827": {
140
+ "content": "<extra_id_1>",
141
+ "lstrip": false,
142
+ "normalized": false,
143
+ "rstrip": false,
144
+ "single_word": false,
145
+ "special": true
146
+ },
147
+ "828": {
148
+ "content": "<extra_id_2>",
149
+ "lstrip": false,
150
+ "normalized": false,
151
+ "rstrip": false,
152
+ "single_word": false,
153
+ "special": true
154
+ },
155
+ "829": {
156
+ "content": "<extra_id_3>",
157
+ "lstrip": false,
158
+ "normalized": false,
159
+ "rstrip": false,
160
+ "single_word": false,
161
+ "special": true
162
+ },
163
+ "830": {
164
+ "content": "<extra_id_4>",
165
+ "lstrip": false,
166
+ "normalized": false,
167
+ "rstrip": false,
168
+ "single_word": false,
169
+ "special": true
170
+ },
171
+ "831": {
172
+ "content": "<extra_id_5>",
173
+ "lstrip": false,
174
+ "normalized": false,
175
+ "rstrip": false,
176
+ "single_word": false,
177
+ "special": true
178
+ },
179
+ "832": {
180
+ "content": "<extra_id_6>",
181
+ "lstrip": false,
182
+ "normalized": false,
183
+ "rstrip": false,
184
+ "single_word": false,
185
+ "special": true
186
+ },
187
+ "833": {
188
+ "content": "<extra_id_7>",
189
+ "lstrip": false,
190
+ "normalized": false,
191
+ "rstrip": false,
192
+ "single_word": false,
193
+ "special": true
194
+ },
195
+ "834": {
196
+ "content": "<extra_id_8>",
197
+ "lstrip": false,
198
+ "normalized": false,
199
+ "rstrip": false,
200
+ "single_word": false,
201
+ "special": true
202
+ },
203
+ "835": {
204
+ "content": "<extra_id_9>",
205
+ "lstrip": false,
206
+ "normalized": false,
207
+ "rstrip": false,
208
+ "single_word": false,
209
+ "special": true
210
+ },
211
+ "836": {
212
+ "content": "<extra_id_10>",
213
+ "lstrip": false,
214
+ "normalized": false,
215
+ "rstrip": false,
216
+ "single_word": false,
217
+ "special": true
218
+ },
219
+ "837": {
220
+ "content": "<extra_id_11>",
221
+ "lstrip": false,
222
+ "normalized": false,
223
+ "rstrip": false,
224
+ "single_word": false,
225
+ "special": true
226
+ },
227
+ "838": {
228
+ "content": "<extra_id_12>",
229
+ "lstrip": false,
230
+ "normalized": false,
231
+ "rstrip": false,
232
+ "single_word": false,
233
+ "special": true
234
+ },
235
+ "839": {
236
+ "content": "<extra_id_13>",
237
+ "lstrip": false,
238
+ "normalized": false,
239
+ "rstrip": false,
240
+ "single_word": false,
241
+ "special": true
242
+ },
243
+ "840": {
244
+ "content": "<extra_id_14>",
245
+ "lstrip": false,
246
+ "normalized": false,
247
+ "rstrip": false,
248
+ "single_word": false,
249
+ "special": true
250
+ },
251
+ "841": {
252
+ "content": "<extra_id_15>",
253
+ "lstrip": false,
254
+ "normalized": false,
255
+ "rstrip": false,
256
+ "single_word": false,
257
+ "special": true
258
+ },
259
+ "842": {
260
+ "content": "<extra_id_16>",
261
+ "lstrip": false,
262
+ "normalized": false,
263
+ "rstrip": false,
264
+ "single_word": false,
265
+ "special": true
266
+ },
267
+ "843": {
268
+ "content": "<extra_id_17>",
269
+ "lstrip": false,
270
+ "normalized": false,
271
+ "rstrip": false,
272
+ "single_word": false,
273
+ "special": true
274
+ },
275
+ "844": {
276
+ "content": "<extra_id_18>",
277
+ "lstrip": false,
278
+ "normalized": false,
279
+ "rstrip": false,
280
+ "single_word": false,
281
+ "special": true
282
+ },
283
+ "845": {
284
+ "content": "<extra_id_19>",
285
+ "lstrip": false,
286
+ "normalized": false,
287
+ "rstrip": false,
288
+ "single_word": false,
289
+ "special": true
290
+ },
291
+ "846": {
292
+ "content": "<extra_id_20>",
293
+ "lstrip": false,
294
+ "normalized": false,
295
+ "rstrip": false,
296
+ "single_word": false,
297
+ "special": true
298
+ },
299
+ "847": {
300
+ "content": "<extra_id_21>",
301
+ "lstrip": false,
302
+ "normalized": false,
303
+ "rstrip": false,
304
+ "single_word": false,
305
+ "special": true
306
+ },
307
+ "848": {
308
+ "content": "<extra_id_22>",
309
+ "lstrip": false,
310
+ "normalized": false,
311
+ "rstrip": false,
312
+ "single_word": false,
313
+ "special": true
314
+ },
315
+ "849": {
316
+ "content": "<extra_id_23>",
317
+ "lstrip": false,
318
+ "normalized": false,
319
+ "rstrip": false,
320
+ "single_word": false,
321
+ "special": true
322
+ },
323
+ "850": {
324
+ "content": "<extra_id_24>",
325
+ "lstrip": false,
326
+ "normalized": false,
327
+ "rstrip": false,
328
+ "single_word": false,
329
+ "special": true
330
+ },
331
+ "851": {
332
+ "content": "<extra_id_25>",
333
+ "lstrip": false,
334
+ "normalized": false,
335
+ "rstrip": false,
336
+ "single_word": false,
337
+ "special": true
338
+ },
339
+ "852": {
340
+ "content": "<extra_id_26>",
341
+ "lstrip": false,
342
+ "normalized": false,
343
+ "rstrip": false,
344
+ "single_word": false,
345
+ "special": true
346
+ },
347
+ "853": {
348
+ "content": "<extra_id_27>",
349
+ "lstrip": false,
350
+ "normalized": false,
351
+ "rstrip": false,
352
+ "single_word": false,
353
+ "special": true
354
+ },
355
+ "854": {
356
+ "content": "<extra_id_28>",
357
+ "lstrip": false,
358
+ "normalized": false,
359
+ "rstrip": false,
360
+ "single_word": false,
361
+ "special": true
362
+ },
363
+ "855": {
364
+ "content": "<extra_id_29>",
365
+ "lstrip": false,
366
+ "normalized": false,
367
+ "rstrip": false,
368
+ "single_word": false,
369
+ "special": true
370
+ },
371
+ "856": {
372
+ "content": "<extra_id_30>",
373
+ "lstrip": false,
374
+ "normalized": false,
375
+ "rstrip": false,
376
+ "single_word": false,
377
+ "special": true
378
+ },
379
+ "857": {
380
+ "content": "<extra_id_31>",
381
+ "lstrip": false,
382
+ "normalized": false,
383
+ "rstrip": false,
384
+ "single_word": false,
385
+ "special": true
386
+ },
387
+ "858": {
388
+ "content": "<extra_id_32>",
389
+ "lstrip": false,
390
+ "normalized": false,
391
+ "rstrip": false,
392
+ "single_word": false,
393
+ "special": true
394
+ },
395
+ "859": {
396
+ "content": "<extra_id_33>",
397
+ "lstrip": false,
398
+ "normalized": false,
399
+ "rstrip": false,
400
+ "single_word": false,
401
+ "special": true
402
+ },
403
+ "860": {
404
+ "content": "<extra_id_34>",
405
+ "lstrip": false,
406
+ "normalized": false,
407
+ "rstrip": false,
408
+ "single_word": false,
409
+ "special": true
410
+ },
411
+ "861": {
412
+ "content": "<extra_id_35>",
413
+ "lstrip": false,
414
+ "normalized": false,
415
+ "rstrip": false,
416
+ "single_word": false,
417
+ "special": true
418
+ },
419
+ "862": {
420
+ "content": "<extra_id_36>",
421
+ "lstrip": false,
422
+ "normalized": false,
423
+ "rstrip": false,
424
+ "single_word": false,
425
+ "special": true
426
+ },
427
+ "863": {
428
+ "content": "<extra_id_37>",
429
+ "lstrip": false,
430
+ "normalized": false,
431
+ "rstrip": false,
432
+ "single_word": false,
433
+ "special": true
434
+ },
435
+ "864": {
436
+ "content": "<extra_id_38>",
437
+ "lstrip": false,
438
+ "normalized": false,
439
+ "rstrip": false,
440
+ "single_word": false,
441
+ "special": true
442
+ },
443
+ "865": {
444
+ "content": "<extra_id_39>",
445
+ "lstrip": false,
446
+ "normalized": false,
447
+ "rstrip": false,
448
+ "single_word": false,
449
+ "special": true
450
+ },
451
+ "866": {
452
+ "content": "<extra_id_40>",
453
+ "lstrip": false,
454
+ "normalized": false,
455
+ "rstrip": false,
456
+ "single_word": false,
457
+ "special": true
458
+ },
459
+ "867": {
460
+ "content": "<extra_id_41>",
461
+ "lstrip": false,
462
+ "normalized": false,
463
+ "rstrip": false,
464
+ "single_word": false,
465
+ "special": true
466
+ },
467
+ "868": {
468
+ "content": "<extra_id_42>",
469
+ "lstrip": false,
470
+ "normalized": false,
471
+ "rstrip": false,
472
+ "single_word": false,
473
+ "special": true
474
+ },
475
+ "869": {
476
+ "content": "<extra_id_43>",
477
+ "lstrip": false,
478
+ "normalized": false,
479
+ "rstrip": false,
480
+ "single_word": false,
481
+ "special": true
482
+ },
483
+ "870": {
484
+ "content": "<extra_id_44>",
485
+ "lstrip": false,
486
+ "normalized": false,
487
+ "rstrip": false,
488
+ "single_word": false,
489
+ "special": true
490
+ },
491
+ "871": {
492
+ "content": "<extra_id_45>",
493
+ "lstrip": false,
494
+ "normalized": false,
495
+ "rstrip": false,
496
+ "single_word": false,
497
+ "special": true
498
+ },
499
+ "872": {
500
+ "content": "<extra_id_46>",
501
+ "lstrip": false,
502
+ "normalized": false,
503
+ "rstrip": false,
504
+ "single_word": false,
505
+ "special": true
506
+ },
507
+ "873": {
508
+ "content": "<extra_id_47>",
509
+ "lstrip": false,
510
+ "normalized": false,
511
+ "rstrip": false,
512
+ "single_word": false,
513
+ "special": true
514
+ },
515
+ "874": {
516
+ "content": "<extra_id_48>",
517
+ "lstrip": false,
518
+ "normalized": false,
519
+ "rstrip": false,
520
+ "single_word": false,
521
+ "special": true
522
+ },
523
+ "875": {
524
+ "content": "<extra_id_49>",
525
+ "lstrip": false,
526
+ "normalized": false,
527
+ "rstrip": false,
528
+ "single_word": false,
529
+ "special": true
530
+ },
531
+ "876": {
532
+ "content": "<extra_id_50>",
533
+ "lstrip": false,
534
+ "normalized": false,
535
+ "rstrip": false,
536
+ "single_word": false,
537
+ "special": true
538
+ },
539
+ "877": {
540
+ "content": "<extra_id_51>",
541
+ "lstrip": false,
542
+ "normalized": false,
543
+ "rstrip": false,
544
+ "single_word": false,
545
+ "special": true
546
+ },
547
+ "878": {
548
+ "content": "<extra_id_52>",
549
+ "lstrip": false,
550
+ "normalized": false,
551
+ "rstrip": false,
552
+ "single_word": false,
553
+ "special": true
554
+ },
555
+ "879": {
556
+ "content": "<extra_id_53>",
557
+ "lstrip": false,
558
+ "normalized": false,
559
+ "rstrip": false,
560
+ "single_word": false,
561
+ "special": true
562
+ },
563
+ "880": {
564
+ "content": "<extra_id_54>",
565
+ "lstrip": false,
566
+ "normalized": false,
567
+ "rstrip": false,
568
+ "single_word": false,
569
+ "special": true
570
+ },
571
+ "881": {
572
+ "content": "<extra_id_55>",
573
+ "lstrip": false,
574
+ "normalized": false,
575
+ "rstrip": false,
576
+ "single_word": false,
577
+ "special": true
578
+ },
579
+ "882": {
580
+ "content": "<extra_id_56>",
581
+ "lstrip": false,
582
+ "normalized": false,
583
+ "rstrip": false,
584
+ "single_word": false,
585
+ "special": true
586
+ },
587
+ "883": {
588
+ "content": "<extra_id_57>",
589
+ "lstrip": false,
590
+ "normalized": false,
591
+ "rstrip": false,
592
+ "single_word": false,
593
+ "special": true
594
+ },
595
+ "884": {
596
+ "content": "<extra_id_58>",
597
+ "lstrip": false,
598
+ "normalized": false,
599
+ "rstrip": false,
600
+ "single_word": false,
601
+ "special": true
602
+ },
603
+ "885": {
604
+ "content": "<extra_id_59>",
605
+ "lstrip": false,
606
+ "normalized": false,
607
+ "rstrip": false,
608
+ "single_word": false,
609
+ "special": true
610
+ },
611
+ "886": {
612
+ "content": "<extra_id_60>",
613
+ "lstrip": false,
614
+ "normalized": false,
615
+ "rstrip": false,
616
+ "single_word": false,
617
+ "special": true
618
+ },
619
+ "887": {
620
+ "content": "<extra_id_61>",
621
+ "lstrip": false,
622
+ "normalized": false,
623
+ "rstrip": false,
624
+ "single_word": false,
625
+ "special": true
626
+ },
627
+ "888": {
628
+ "content": "<extra_id_62>",
629
+ "lstrip": false,
630
+ "normalized": false,
631
+ "rstrip": false,
632
+ "single_word": false,
633
+ "special": true
634
+ },
635
+ "889": {
636
+ "content": "<extra_id_63>",
637
+ "lstrip": false,
638
+ "normalized": false,
639
+ "rstrip": false,
640
+ "single_word": false,
641
+ "special": true
642
+ },
643
+ "890": {
644
+ "content": "<extra_id_64>",
645
+ "lstrip": false,
646
+ "normalized": false,
647
+ "rstrip": false,
648
+ "single_word": false,
649
+ "special": true
650
+ },
651
+ "891": {
652
+ "content": "<extra_id_65>",
653
+ "lstrip": false,
654
+ "normalized": false,
655
+ "rstrip": false,
656
+ "single_word": false,
657
+ "special": true
658
+ },
659
+ "892": {
660
+ "content": "<extra_id_66>",
661
+ "lstrip": false,
662
+ "normalized": false,
663
+ "rstrip": false,
664
+ "single_word": false,
665
+ "special": true
666
+ },
667
+ "893": {
668
+ "content": "<extra_id_67>",
669
+ "lstrip": false,
670
+ "normalized": false,
671
+ "rstrip": false,
672
+ "single_word": false,
673
+ "special": true
674
+ },
675
+ "894": {
676
+ "content": "<extra_id_68>",
677
+ "lstrip": false,
678
+ "normalized": false,
679
+ "rstrip": false,
680
+ "single_word": false,
681
+ "special": true
682
+ },
683
+ "895": {
684
+ "content": "<extra_id_69>",
685
+ "lstrip": false,
686
+ "normalized": false,
687
+ "rstrip": false,
688
+ "single_word": false,
689
+ "special": true
690
+ },
691
+ "896": {
692
+ "content": "<extra_id_70>",
693
+ "lstrip": false,
694
+ "normalized": false,
695
+ "rstrip": false,
696
+ "single_word": false,
697
+ "special": true
698
+ },
699
+ "897": {
700
+ "content": "<extra_id_71>",
701
+ "lstrip": false,
702
+ "normalized": false,
703
+ "rstrip": false,
704
+ "single_word": false,
705
+ "special": true
706
+ },
707
+ "898": {
708
+ "content": "<extra_id_72>",
709
+ "lstrip": false,
710
+ "normalized": false,
711
+ "rstrip": false,
712
+ "single_word": false,
713
+ "special": true
714
+ },
715
+ "899": {
716
+ "content": "<extra_id_73>",
717
+ "lstrip": false,
718
+ "normalized": false,
719
+ "rstrip": false,
720
+ "single_word": false,
721
+ "special": true
722
+ },
723
+ "900": {
724
+ "content": "<extra_id_74>",
725
+ "lstrip": false,
726
+ "normalized": false,
727
+ "rstrip": false,
728
+ "single_word": false,
729
+ "special": true
730
+ },
731
+ "901": {
732
+ "content": "<extra_id_75>",
733
+ "lstrip": false,
734
+ "normalized": false,
735
+ "rstrip": false,
736
+ "single_word": false,
737
+ "special": true
738
+ },
739
+ "902": {
740
+ "content": "<extra_id_76>",
741
+ "lstrip": false,
742
+ "normalized": false,
743
+ "rstrip": false,
744
+ "single_word": false,
745
+ "special": true
746
+ },
747
+ "903": {
748
+ "content": "<extra_id_77>",
749
+ "lstrip": false,
750
+ "normalized": false,
751
+ "rstrip": false,
752
+ "single_word": false,
753
+ "special": true
754
+ },
755
+ "904": {
756
+ "content": "<extra_id_78>",
757
+ "lstrip": false,
758
+ "normalized": false,
759
+ "rstrip": false,
760
+ "single_word": false,
761
+ "special": true
762
+ },
763
+ "905": {
764
+ "content": "<extra_id_79>",
765
+ "lstrip": false,
766
+ "normalized": false,
767
+ "rstrip": false,
768
+ "single_word": false,
769
+ "special": true
770
+ },
771
+ "906": {
772
+ "content": "<extra_id_80>",
773
+ "lstrip": false,
774
+ "normalized": false,
775
+ "rstrip": false,
776
+ "single_word": false,
777
+ "special": true
778
+ },
779
+ "907": {
780
+ "content": "<extra_id_81>",
781
+ "lstrip": false,
782
+ "normalized": false,
783
+ "rstrip": false,
784
+ "single_word": false,
785
+ "special": true
786
+ },
787
+ "908": {
788
+ "content": "<extra_id_82>",
789
+ "lstrip": false,
790
+ "normalized": false,
791
+ "rstrip": false,
792
+ "single_word": false,
793
+ "special": true
794
+ },
795
+ "909": {
796
+ "content": "<extra_id_83>",
797
+ "lstrip": false,
798
+ "normalized": false,
799
+ "rstrip": false,
800
+ "single_word": false,
801
+ "special": true
802
+ },
803
+ "910": {
804
+ "content": "<extra_id_84>",
805
+ "lstrip": false,
806
+ "normalized": false,
807
+ "rstrip": false,
808
+ "single_word": false,
809
+ "special": true
810
+ },
811
+ "911": {
812
+ "content": "<extra_id_85>",
813
+ "lstrip": false,
814
+ "normalized": false,
815
+ "rstrip": false,
816
+ "single_word": false,
817
+ "special": true
818
+ },
819
+ "912": {
820
+ "content": "<extra_id_86>",
821
+ "lstrip": false,
822
+ "normalized": false,
823
+ "rstrip": false,
824
+ "single_word": false,
825
+ "special": true
826
+ },
827
+ "913": {
828
+ "content": "<extra_id_87>",
829
+ "lstrip": false,
830
+ "normalized": false,
831
+ "rstrip": false,
832
+ "single_word": false,
833
+ "special": true
834
+ },
835
+ "914": {
836
+ "content": "<extra_id_88>",
837
+ "lstrip": false,
838
+ "normalized": false,
839
+ "rstrip": false,
840
+ "single_word": false,
841
+ "special": true
842
+ },
843
+ "915": {
844
+ "content": "<extra_id_89>",
845
+ "lstrip": false,
846
+ "normalized": false,
847
+ "rstrip": false,
848
+ "single_word": false,
849
+ "special": true
850
+ },
851
+ "916": {
852
+ "content": "<extra_id_90>",
853
+ "lstrip": false,
854
+ "normalized": false,
855
+ "rstrip": false,
856
+ "single_word": false,
857
+ "special": true
858
+ },
859
+ "917": {
860
+ "content": "<extra_id_91>",
861
+ "lstrip": false,
862
+ "normalized": false,
863
+ "rstrip": false,
864
+ "single_word": false,
865
+ "special": true
866
+ },
867
+ "918": {
868
+ "content": "<extra_id_92>",
869
+ "lstrip": false,
870
+ "normalized": false,
871
+ "rstrip": false,
872
+ "single_word": false,
873
+ "special": true
874
+ },
875
+ "919": {
876
+ "content": "<extra_id_93>",
877
+ "lstrip": false,
878
+ "normalized": false,
879
+ "rstrip": false,
880
+ "single_word": false,
881
+ "special": true
882
+ },
883
+ "920": {
884
+ "content": "<extra_id_94>",
885
+ "lstrip": false,
886
+ "normalized": false,
887
+ "rstrip": false,
888
+ "single_word": false,
889
+ "special": true
890
+ },
891
+ "921": {
892
+ "content": "<extra_id_95>",
893
+ "lstrip": false,
894
+ "normalized": false,
895
+ "rstrip": false,
896
+ "single_word": false,
897
+ "special": true
898
+ },
899
+ "922": {
900
+ "content": "<extra_id_96>",
901
+ "lstrip": false,
902
+ "normalized": false,
903
+ "rstrip": false,
904
+ "single_word": false,
905
+ "special": true
906
+ },
907
+ "923": {
908
+ "content": "<extra_id_97>",
909
+ "lstrip": false,
910
+ "normalized": false,
911
+ "rstrip": false,
912
+ "single_word": false,
913
+ "special": true
914
+ },
915
+ "924": {
916
+ "content": "<extra_id_98>",
917
+ "lstrip": false,
918
+ "normalized": false,
919
+ "rstrip": false,
920
+ "single_word": false,
921
+ "special": true
922
+ },
923
+ "925": {
924
+ "content": "<extra_id_99>",
925
+ "lstrip": false,
926
+ "normalized": false,
927
+ "rstrip": false,
928
+ "single_word": false,
929
+ "special": true
930
+ }
931
+ },
932
+ "additional_special_tokens": [
933
+ "<arb>",
934
+ "<asm>",
935
+ "<ben>",
936
+ "<guj>",
937
+ "<hin>",
938
+ "<kan>",
939
+ "<mal>",
940
+ "<mar>",
941
+ "<ori>",
942
+ "<pan>",
943
+ "<tam>",
944
+ "<tel>",
945
+ "<extra_id_0>",
946
+ "<extra_id_1>",
947
+ "<extra_id_2>",
948
+ "<extra_id_3>",
949
+ "<extra_id_4>",
950
+ "<extra_id_5>",
951
+ "<extra_id_6>",
952
+ "<extra_id_7>",
953
+ "<extra_id_8>",
954
+ "<extra_id_9>",
955
+ "<extra_id_10>",
956
+ "<extra_id_11>",
957
+ "<extra_id_12>",
958
+ "<extra_id_13>",
959
+ "<extra_id_14>",
960
+ "<extra_id_15>",
961
+ "<extra_id_16>",
962
+ "<extra_id_17>",
963
+ "<extra_id_18>",
964
+ "<extra_id_19>",
965
+ "<extra_id_20>",
966
+ "<extra_id_21>",
967
+ "<extra_id_22>",
968
+ "<extra_id_23>",
969
+ "<extra_id_24>",
970
+ "<extra_id_25>",
971
+ "<extra_id_26>",
972
+ "<extra_id_27>",
973
+ "<extra_id_28>",
974
+ "<extra_id_29>",
975
+ "<extra_id_30>",
976
+ "<extra_id_31>",
977
+ "<extra_id_32>",
978
+ "<extra_id_33>",
979
+ "<extra_id_34>",
980
+ "<extra_id_35>",
981
+ "<extra_id_36>",
982
+ "<extra_id_37>",
983
+ "<extra_id_38>",
984
+ "<extra_id_39>",
985
+ "<extra_id_40>",
986
+ "<extra_id_41>",
987
+ "<extra_id_42>",
988
+ "<extra_id_43>",
989
+ "<extra_id_44>",
990
+ "<extra_id_45>",
991
+ "<extra_id_46>",
992
+ "<extra_id_47>",
993
+ "<extra_id_48>",
994
+ "<extra_id_49>",
995
+ "<extra_id_50>",
996
+ "<extra_id_51>",
997
+ "<extra_id_52>",
998
+ "<extra_id_53>",
999
+ "<extra_id_54>",
1000
+ "<extra_id_55>",
1001
+ "<extra_id_56>",
1002
+ "<extra_id_57>",
1003
+ "<extra_id_58>",
1004
+ "<extra_id_59>",
1005
+ "<extra_id_60>",
1006
+ "<extra_id_61>",
1007
+ "<extra_id_62>",
1008
+ "<extra_id_63>",
1009
+ "<extra_id_64>",
1010
+ "<extra_id_65>",
1011
+ "<extra_id_66>",
1012
+ "<extra_id_67>",
1013
+ "<extra_id_68>",
1014
+ "<extra_id_69>",
1015
+ "<extra_id_70>",
1016
+ "<extra_id_71>",
1017
+ "<extra_id_72>",
1018
+ "<extra_id_73>",
1019
+ "<extra_id_74>",
1020
+ "<extra_id_75>",
1021
+ "<extra_id_76>",
1022
+ "<extra_id_77>",
1023
+ "<extra_id_78>",
1024
+ "<extra_id_79>",
1025
+ "<extra_id_80>",
1026
+ "<extra_id_81>",
1027
+ "<extra_id_82>",
1028
+ "<extra_id_83>",
1029
+ "<extra_id_84>",
1030
+ "<extra_id_85>",
1031
+ "<extra_id_86>",
1032
+ "<extra_id_87>",
1033
+ "<extra_id_88>",
1034
+ "<extra_id_89>",
1035
+ "<extra_id_90>",
1036
+ "<extra_id_91>",
1037
+ "<extra_id_92>",
1038
+ "<extra_id_93>",
1039
+ "<extra_id_94>",
1040
+ "<extra_id_95>",
1041
+ "<extra_id_96>",
1042
+ "<extra_id_97>",
1043
+ "<extra_id_98>",
1044
+ "<extra_id_99>"
1045
+ ],
1046
+ "bos_token": "<s>",
1047
+ "clean_up_tokenization_spaces": false,
1048
+ "eos_token": "</s>",
1049
+ "extra_ids": 100,
1050
+ "model_max_length": 1000000000000000019884624838656,
1051
+ "pad_token": "<pad>",
1052
+ "tokenizer_class": "T5Tokenizer",
1053
+ "unk_token": "<unk>"
1054
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aaaf75165f97986b66f80749f20de988efe4be07e385bfe7a8219ef19d9ee064
3
+ size 4015