mrfakename commited on
Commit
03572f6
β€’
1 Parent(s): b8401e1

Enhancements

Browse files
Files changed (1) hide show
  1. melo/split_utils.py +1 -1
melo/split_utils.py CHANGED
@@ -18,7 +18,7 @@ def split_sentences_latin(text, min_len=10):
18
  text = re.sub('[β€œβ€]', '"', text)
19
  text = re.sub('[β€˜β€™]', "'", text)
20
  text = re.sub(r"[\<\>\(\)\[\]\"\Β«\Β»]+", "", text)
21
- return txtsplit(text, 512, 512)
22
  # ε°†ζ–‡ζœ¬δΈ­ηš„ζ’θ‘Œη¬¦γ€η©Ίζ Όε’ŒεˆΆθ‘¨η¬¦ζ›Ώζ’δΈΊη©Ίζ Ό
23
  # text = re.sub('[\n\t ]+', ' ', text)
24
  # # εœ¨ζ ‡η‚Ήη¬¦ε·εŽζ·»εŠ δΈ€δΈͺη©Ίζ Ό
 
18
  text = re.sub('[β€œβ€]', '"', text)
19
  text = re.sub('[β€˜β€™]', "'", text)
20
  text = re.sub(r"[\<\>\(\)\[\]\"\Β«\Β»]+", "", text)
21
+ return [item.strip() for item in txtsplit(text, 512, 512) if item.strip()]
22
  # ε°†ζ–‡ζœ¬δΈ­ηš„ζ’θ‘Œη¬¦γ€η©Ίζ Όε’ŒεˆΆθ‘¨η¬¦ζ›Ώζ’δΈΊη©Ίζ Ό
23
  # text = re.sub('[\n\t ]+', ' ', text)
24
  # # εœ¨ζ ‡η‚Ήη¬¦ε·εŽζ·»εŠ δΈ€δΈͺη©Ίζ Ό