|
#!/usr/bin/env python |
|
# -*- coding: utf-8 -*- |
|
|
|
"" |
|
This script removes all extra spaces (more than one) and new line characters (truncating to one single character) |
|
from all *.caption and *.txt files in a target directory recursively. If no target directory is provided as an |
|
argument, it processes the current directory. |
|
|
|
Usage: |
|
python script_name.py [target_directory] |
|
|
|
Args: |
|
target_directory (str, optional): The path to the target directory. If not provided, the current directory is used. |
|
"" |
|
|
|
import os |
|
import sys |
|
import glob |
|
|
|
def remove_extra_spaces_and_newlines(file_path): |
|
"" |
|
Removes extra spaces (more than one) and new line characters from the given file. |
|
Truncates the text to a single space or new line character without removing any text. |
|
|
|
Args: |
|
file_path (str): The path to the file to be processed. |
|
"" |
|
with open(file_path, 'r', encoding='utf-8') as file: |
|
content = file.read() |
|
|
|
# Replace multiple spaces with a single space |
|
content = ' '.join(content.split()) |
|
|
|
# Replace multiple newlines with a single newline |
|
content = '\n'.join(line.strip() for line in content.split('\n')) |
|
|
|
with open(file_path, 'w', encoding='utf-8') as file: |
|
file.write(content) |
|
|
|
def process_files_in_directory(directory): |
|
"" |
|
Processes all *.caption and *.txt files in the given directory recursively. |
|
Removes extra spaces and new line characters from each file. |
|
|
|
Args: |
|
directory (str): The path to the directory to be processed. |
|
"" |
|
for file_path in glob.glob(os.path.join(directory, '**', '*.caption'), recursive=True): |
|
remove_extra_spaces_and_newlines(file_path) |
|
for file_path in glob.glob(os.path.join(directory, '**', '*.txt'), recursive=True): |
|
remove_extra_spaces_and_newlines(file_path) |
|
|
|
if __name__ == "__main__": |
|
if len(sys.argv) > 1: |
|
target_directory = sys.argv[1] |
|
else: |
|
target_directory = os.getcwd() |
|
|
|
process_files_in_directory(target_directory) |
|
|
|
|