FAYO
model
1ef9436
raw
history blame
714 Bytes
SYMBOL_SPLITS = {
"。",
"?",
"!",
"……",
".",
"?",
"!",
"~",
"…",
}
def make_text_chunk(original_text, strat_index, max_len=5, max_try=5000):
cut_string = original_text
end_index = strat_index
while True:
if original_text[end_index] in SYMBOL_SPLITS:
end_index += 1
cut_string = original_text[strat_index:end_index]
break
else:
end_index += 1
if end_index >= len(original_text):
# 文本太短,没找到
return 0, ""
if end_index > max_try:
# 有问题
raise ValueError("Reach max try")
return end_index, cut_string