zhuyunfeng commited on
Commit
6ae6ba3
·
unverified ·
1 Parent(s): d1df605

Update frontend.py

Browse files

Fix the bug in handling anomalies for synthetic text ending with Chinese and English commas.

Files changed (1) hide show
  1. cosyvoice/cli/frontend.py +2 -0
cosyvoice/cli/frontend.py CHANGED
@@ -20,6 +20,7 @@ from typing import Callable
20
  import torchaudio.compliance.kaldi as kaldi
21
  import torchaudio
22
  import os
 
23
  import inflect
24
  try:
25
  import ttsfrd
@@ -110,6 +111,7 @@ class CosyVoiceFrontEnd:
110
  text = text.replace(".", "、")
111
  text = text.replace(" - ", ",")
112
  text = remove_bracket(text)
 
113
  texts = [i for i in split_paragraph(text, partial(self.tokenizer.encode, allowed_special=self.allowed_special), "zh", token_max_n=80,
114
  token_min_n=60, merge_len=20,
115
  comma_split=False)]
 
20
  import torchaudio.compliance.kaldi as kaldi
21
  import torchaudio
22
  import os
23
+ import re
24
  import inflect
25
  try:
26
  import ttsfrd
 
111
  text = text.replace(".", "、")
112
  text = text.replace(" - ", ",")
113
  text = remove_bracket(text)
114
+ text = re.sub(r'[,,]+$', '。', text)
115
  texts = [i for i in split_paragraph(text, partial(self.tokenizer.encode, allowed_special=self.allowed_special), "zh", token_max_n=80,
116
  token_min_n=60, merge_len=20,
117
  comma_split=False)]