pits / text /symbols.py
junhyouk lee
hfdemo
b8b70ac
raw
history blame
1.17 kB
_pad = '_'
_punc = ";:,.!?¡¿—-…«»'“”~() "
_jamo_leads = "".join([chr(_) for _ in range(0x1100, 0x1113)])
_jamo_vowels = "".join([chr(_) for _ in range(0x1161, 0x1176)])
_jamo_tails = "".join([chr(_) for _ in range(0x11A8, 0x11C3)])
_kor_characters = _jamo_leads + _jamo_vowels + _jamo_tails
_cmu_characters = [
'AA', 'AE', 'AH',
'AO', 'AW', 'AY',
'B', 'CH', 'D', 'DH', 'EH', 'ER', 'EY',
'F', 'G', 'HH', 'IH', 'IY',
'JH', 'K', 'L', 'M', 'N', 'NG', 'OW', 'OY',
'P', 'R', 'S', 'SH', 'T', 'TH', 'UH', 'UW',
'V', 'W', 'Y', 'Z', 'ZH'
]
lang_to_symbols = {
'common': [_pad] + list(_punc),
'ko_KR': list(_kor_characters),
'en_US': _cmu_characters,
}
def lang_to_dict(lang):
symbol_lang = lang_to_symbols['common'] + lang_to_symbols[lang]
dict_lang = {s: i for i, s in enumerate(symbol_lang)}
return dict_lang
def lang_to_dict_inverse(lang):
symbol_lang = lang_to_symbols['common'] + lang_to_symbols[lang]
dict_lang = {i: s for i, s in enumerate(symbol_lang)}
return dict_lang
def symbol_len(lang):
symbol_lang = lang_to_symbols['common'] + lang_to_symbols[lang]
return len(symbol_lang)