Spaces:
Running
Running
jhj0517
commited on
Commit
·
79d567a
1
Parent(s):
3b27598
Add lrc word aligning feature
Browse files
modules/utils/subtitle_manager.py
CHANGED
@@ -107,12 +107,14 @@ class SubtitlesWriter(ResultWriter):
|
|
107 |
max_line_width: Optional[int] = None,
|
108 |
max_line_count: Optional[int] = None,
|
109 |
highlight_words: bool = False,
|
|
|
110 |
max_words_per_line: Optional[int] = None,
|
111 |
):
|
112 |
options = options or {}
|
113 |
max_line_width = max_line_width or options.get("max_line_width")
|
114 |
max_line_count = max_line_count or options.get("max_line_count")
|
115 |
highlight_words = highlight_words or options.get("highlight_words", False)
|
|
|
116 |
max_words_per_line = max_words_per_line or options.get("max_words_per_line")
|
117 |
preserve_segments = max_line_count is None or max_line_width is None
|
118 |
max_line_width = max_line_width or 1000
|
@@ -195,6 +197,14 @@ class SubtitlesWriter(ResultWriter):
|
|
195 |
]
|
196 |
)
|
197 |
last = end
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
198 |
else:
|
199 |
yield subtitle_start, subtitle_end, subtitle_text
|
200 |
else:
|
@@ -291,7 +301,10 @@ class WriteLRC(SubtitlesWriter):
|
|
291 |
for i, (start, end, text) in enumerate(
|
292 |
self.iterate_result(result, options, **kwargs), start=1
|
293 |
):
|
294 |
-
|
|
|
|
|
|
|
295 |
|
296 |
def to_segments(self, file_path: str) -> List[Segment]:
|
297 |
segments = []
|
@@ -387,6 +400,10 @@ def generate_file(
|
|
387 |
|
388 |
file_path = os.path.join(output_dir, f"{output_file_name}.{output_format}")
|
389 |
file_writer = get_writer(output_format=output_format, output_dir=output_dir)
|
|
|
|
|
|
|
|
|
390 |
file_writer(result=result, output_file_name=output_file_name, **kwargs)
|
391 |
content = read_file(file_path)
|
392 |
return content, file_path
|
|
|
107 |
max_line_width: Optional[int] = None,
|
108 |
max_line_count: Optional[int] = None,
|
109 |
highlight_words: bool = False,
|
110 |
+
align_lrc_words: bool = False,
|
111 |
max_words_per_line: Optional[int] = None,
|
112 |
):
|
113 |
options = options or {}
|
114 |
max_line_width = max_line_width or options.get("max_line_width")
|
115 |
max_line_count = max_line_count or options.get("max_line_count")
|
116 |
highlight_words = highlight_words or options.get("highlight_words", False)
|
117 |
+
align_lrc_words = align_lrc_words or options.get("align_lrc_words", False)
|
118 |
max_words_per_line = max_words_per_line or options.get("max_words_per_line")
|
119 |
preserve_segments = max_line_count is None or max_line_width is None
|
120 |
max_line_width = max_line_width or 1000
|
|
|
197 |
]
|
198 |
)
|
199 |
last = end
|
200 |
+
|
201 |
+
if align_lrc_words:
|
202 |
+
lrc_aligned_words = [f"[{self.format_timestamp(sub['start'])}]{sub['word']}" for sub in subtitle]
|
203 |
+
l_start, l_end = self.format_timestamp(subtitle[-1]['start']), self.format_timestamp(subtitle[-1]['end'])
|
204 |
+
lrc_aligned_words[-1] = f"[{l_start}]{subtitle[-1]['word']}[{l_end}]"
|
205 |
+
lrc_aligned_words = ' '.join(lrc_aligned_words)
|
206 |
+
yield None, None, lrc_aligned_words
|
207 |
+
|
208 |
else:
|
209 |
yield subtitle_start, subtitle_end, subtitle_text
|
210 |
else:
|
|
|
301 |
for i, (start, end, text) in enumerate(
|
302 |
self.iterate_result(result, options, **kwargs), start=1
|
303 |
):
|
304 |
+
if "align_lrc_words" in kwargs and kwargs["align_lrc_words"]:
|
305 |
+
print(f"{text}\n", file=file, flush=True)
|
306 |
+
else:
|
307 |
+
print(f"[{start}]{text}[{end}]\n", file=file, flush=True)
|
308 |
|
309 |
def to_segments(self, file_path: str) -> List[Segment]:
|
310 |
segments = []
|
|
|
400 |
|
401 |
file_path = os.path.join(output_dir, f"{output_file_name}.{output_format}")
|
402 |
file_writer = get_writer(output_format=output_format, output_dir=output_dir)
|
403 |
+
|
404 |
+
if isinstance(file_writer, WriteLRC) and kwargs["highlight_words"]:
|
405 |
+
kwargs["highlight_words"], kwargs["align_lrc_words"] = False, True
|
406 |
+
|
407 |
file_writer(result=result, output_file_name=output_file_name, **kwargs)
|
408 |
content = read_file(file_path)
|
409 |
return content, file_path
|