Upload tokenizer

Files changed (4) hide show

added_tokens.json CHANGED Viewed

@@ -1595,6 +1595,7 @@
   "<|th|>": 50289,
   "<|tk|>": 50341,
   "<|tl|>": 50348,
   "<|transcribe|>": 50359,
   "<|translate|>": 50358,
   "<|tr|>": 50268,

   "<|th|>": 50289,
   "<|tk|>": 50341,
   "<|tl|>": 50348,
+  "<|transcribeprecise|>": 51865,
   "<|transcribe|>": 50359,
   "<|translate|>": 50358,
   "<|tr|>": 50268,

tokenizer.json CHANGED Viewed

@@ -14474,6 +14474,15 @@
       "rstrip": false,
       "normalized": true,
       "special": false
     }
   ],
   "normalizer": null,

       "rstrip": false,
       "normalized": true,
       "special": false
+    },
+    {
+      "id": 51865,
+      "content": "<|transcribeprecise|>",
+      "single_word": false,
+      "lstrip": false,
+      "rstrip": false,
+      "normalized": true,
+      "special": false
     }
   ],
   "normalizer": null,

tokenizer_config.json CHANGED Viewed

@@ -12865,6 +12865,14 @@
       "rstrip": false,
       "single_word": false,
       "special": false
     }
   },
   "additional_special_tokens": [
@@ -12980,6 +12988,7 @@
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|endoftext|>",
   "errors": "replace",
   "model_max_length": 1024,
   "pad_token": "<|endoftext|>",
   "processor_class": "WhisperProcessor",

       "rstrip": false,
       "single_word": false,
       "special": false
+    },
+    "51865": {
+      "content": "<|transcribeprecise|>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": false
     }
   },
   "additional_special_tokens": [
   "clean_up_tokenization_spaces": true,
   "eos_token": "<|endoftext|>",
   "errors": "replace",
+  "extra_special_tokens": {},
   "model_max_length": 1024,
   "pad_token": "<|endoftext|>",
   "processor_class": "WhisperProcessor",

vocab.json CHANGED Viewed

The diff for this file is too large to render. See raw diff