File size: 2,008 Bytes
ee41534 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 |
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
This script removes all extra spaces (more than one) and new line characters (truncating to one single character)
from all *.caption and *.txt files in a target directory recursively. If no target directory is provided as an
argument, it processes the current directory.
Usage:
python script_name.py [target_directory]
Args:
target_directory (str, optional): The path to the target directory. If not provided, the current directory is used.
"""
import os
import sys
import glob
def remove_extra_spaces_and_newlines(file_path):
"""
Removes extra spaces (more than one) and new line characters from the given file.
Truncates the text to a single space or new line character without removing any text.
Args:
file_path (str): The path to the file to be processed.
"""
with open(file_path, 'r', encoding='utf-8') as file:
content = file.read()
# Replace multiple spaces with a single space
content = ' '.join(content.split())
# Replace multiple newlines with a single newline
content = '\n'.join(line.strip() for line in content.split('\n'))
with open(file_path, 'w', encoding='utf-8') as file:
file.write(content)
def process_files_in_directory(directory):
"""
Processes all *.caption and *.txt files in the given directory recursively.
Removes extra spaces and new line characters from each file.
Args:
directory (str): The path to the directory to be processed.
"""
for file_path in glob.glob(os.path.join(directory, '**', '*.caption'), recursive=True):
remove_extra_spaces_and_newlines(file_path)
for file_path in glob.glob(os.path.join(directory, '**', '*.txt'), recursive=True):
remove_extra_spaces_and_newlines(file_path)
if __name__ == "__main__":
if len(sys.argv) > 1:
target_directory = sys.argv[1]
else:
target_directory = os.getcwd()
process_files_in_directory(target_directory)
|