qirimtatar-tts / tests /test_converter.py
Yurii Paniv
Add converter to latin
8d24ba9
raw
history blame
2.39 kB
import converter
def test_cyrillic_converter():
cases = _read_test_cases()
for case in cases:
assert converter.to_cyrillic(case[0]) == case[1]
def test_latin_converter():
cases = _read_test_cases()
print(cases)
for case in cases:
assert converter.to_latin(case[1]).lower() == case[0].lower()
def test_letter_coverage():
"""
Check if all letters are present in a test set.
"""
latin_alphabet = [
"a",
"â",
"b",
"c",
"ç",
"d",
"e",
"f",
"g",
"ğ",
"h",
"ı",
"i",
"j",
"k",
"l",
"m",
"n",
"ñ",
"o",
"ö",
"p",
"q",
"r",
"s",
"ş",
"t",
"u",
"ü",
"v",
"y",
"z",
]
cyrillic_alphabet = [
"а",
"б",
"в",
"г",
"гъ",
"д",
"е",
"ё",
"ж",
"з",
"и",
"й",
"к",
"къ",
"л",
"м",
"н",
"нъ",
"о",
"п",
"р",
"с",
"т",
"у",
"ф",
"х",
"ц",
"ч",
"дж",
"ш",
"щ",
"ъ",
"ы",
"ь",
"э",
"ю",
"я",
]
cases = _read_test_cases()
missing_letters = []
latin_cases = " ".join([case[0] for case in cases]).lower()
for letter in sorted(latin_alphabet, key=lambda x: len(x), reverse=True):
if letter not in latin_cases:
missing_letters.append(letter)
latin_cases = latin_cases.replace(letter, "")
cyrillic_cases = " ".join([case[1] for case in cases]).lower()
for letter in sorted(cyrillic_alphabet, key=lambda x: len(x), reverse=True):
if letter not in cyrillic_cases:
missing_letters.append(letter)
cyrillic_cases = cyrillic_cases.replace(letter, "")
if len(missing_letters) > 0:
raise Exception(f"'{missing_letters}' not found in test dataset!")
def _read_test_cases():
with open("tests/rosetta.csv") as file:
text = file.read()
rows = text.split("\n")
for i in range(0, len(rows)):
rows[i] = rows[i].split("|")
return rows