AlexN commited on
Commit
2d73c3c
1 Parent(s): bea3b75
Files changed (1) hide show
  1. run_speech_recognition_ctc.py +4 -6
run_speech_recognition_ctc.py CHANGED
@@ -511,7 +511,6 @@ def main():
511
  tokenizer_kwargs = {
512
  "config": config if config.tokenizer_class is not None else None,
513
  "tokenizer_type": config.model_type if config.tokenizer_class is None else None,
514
- "bos_token": "<s>",
515
  "unk_token": unk_token,
516
  "pad_token": pad_token,
517
  "word_delimiter_token": word_delimiter_token,
@@ -522,11 +521,10 @@ def main():
522
  # one local process can concurrently download model & vocab.
523
 
524
  # load feature_extractor and tokenizer
525
- tokenizer = AutoTokenizer.from_pretrained(
526
- tokenizer_name_or_path,
527
- use_auth_token=data_args.use_auth_token,
528
- **tokenizer_kwargs,
529
- )
530
  feature_extractor = AutoFeatureExtractor.from_pretrained(
531
  model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token
532
  )
 
511
  tokenizer_kwargs = {
512
  "config": config if config.tokenizer_class is not None else None,
513
  "tokenizer_type": config.model_type if config.tokenizer_class is None else None,
 
514
  "unk_token": unk_token,
515
  "pad_token": pad_token,
516
  "word_delimiter_token": word_delimiter_token,
 
521
  # one local process can concurrently download model & vocab.
522
 
523
  # load feature_extractor and tokenizer
524
+ tokenizer = Wav2Vec2CTCTokenizer(tokenizer_name_or_path,
525
+ use_auth_token=data_args.use_auth_token,
526
+ **tokenizer_kwargs,
527
+ )
 
528
  feature_extractor = AutoFeatureExtractor.from_pretrained(
529
  model_args.model_name_or_path, cache_dir=model_args.cache_dir, use_auth_token=data_args.use_auth_token
530
  )