k4d3
/

toolkit

Model card Files Files and versions Community

toolkit / pick_caption

k4d3's picture

there were questionable decisions made here..

6b5f1f8 27 days ago

963 Bytes

	#!/usr/bin/env python

	import os
	import re

	def process_caption_files():
	for file in os.listdir():
	if file.endswith(".caption"):
	# Check if the file already contains processed data
	with open(file, 'r') as f:
	lines = f.readlines()

	if "\n----------\n" not in "".join(lines):
	continue

	for line in lines:
	if "----------" in line:
	break # Stop processing after finding the separator

	content = ''.join(lines[:lines.index(line)]) # Extract text before the separator
	processed_content = re.sub(r'[\-]+\|\n', '', content) # Remove newlines and separator

	with open(file, 'w') as f: # Save the condensed caption back to the same file
	f.write(processed_content)

	process_caption_files()