#!/usr/bin/env python # -*- coding: utf-8 -*- """ This script removes all extra spaces (more than one) and new line characters (truncating to one single character) from all *.caption and *.txt files in a target directory recursively. If no target directory is provided as an argument, it processes the current directory. Usage: python script_name.py [target_directory] Args: target_directory (str, optional): The path to the target directory. If not provided, the current directory is used. """ import os import sys import glob def remove_extra_spaces_and_newlines(file_path): """ Removes extra spaces (more than one) and new line characters from the given file. Truncates the text to a single space or new line character without removing any text. Args: file_path (str): The path to the file to be processed. """ with open(file_path, 'r', encoding='utf-8') as file: content = file.read() # Replace multiple spaces with a single space content = ' '.join(content.split()) # Replace multiple newlines with a single newline content = '\n'.join(line.strip() for line in content.split('\n')) with open(file_path, 'w', encoding='utf-8') as file: file.write(content) def process_files_in_directory(directory): """ Processes all *.caption and *.txt files in the given directory recursively. Removes extra spaces and new line characters from each file. Args: directory (str): The path to the directory to be processed. """ for file_path in glob.glob(os.path.join(directory, '**', '*.caption'), recursive=True): remove_extra_spaces_and_newlines(file_path) for file_path in glob.glob(os.path.join(directory, '**', '*.txt'), recursive=True): remove_extra_spaces_and_newlines(file_path) if __name__ == "__main__": if len(sys.argv) > 1: target_directory = sys.argv[1] else: target_directory = os.getcwd() process_files_in_directory(target_directory)