File size: 2,008 Bytes
ee41534
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
This script removes all extra spaces (more than one) and new line characters (truncating to one single character) 
from all *.caption and *.txt files in a target directory recursively. If no target directory is provided as an 
argument, it processes the current directory.

Usage:
    python script_name.py [target_directory]

Args:
    target_directory (str, optional): The path to the target directory. If not provided, the current directory is used.
"""

import os
import sys
import glob

def remove_extra_spaces_and_newlines(file_path):
    """
    Removes extra spaces (more than one) and new line characters from the given file.
    Truncates the text to a single space or new line character without removing any text.

    Args:
        file_path (str): The path to the file to be processed.
    """
    with open(file_path, 'r', encoding='utf-8') as file:
        content = file.read()

    # Replace multiple spaces with a single space
    content = ' '.join(content.split())

    # Replace multiple newlines with a single newline
    content = '\n'.join(line.strip() for line in content.split('\n'))

    with open(file_path, 'w', encoding='utf-8') as file:
        file.write(content)

def process_files_in_directory(directory):
    """
    Processes all *.caption and *.txt files in the given directory recursively.
    Removes extra spaces and new line characters from each file.

    Args:
        directory (str): The path to the directory to be processed.
    """
    for file_path in glob.glob(os.path.join(directory, '**', '*.caption'), recursive=True):
        remove_extra_spaces_and_newlines(file_path)
    for file_path in glob.glob(os.path.join(directory, '**', '*.txt'), recursive=True):
        remove_extra_spaces_and_newlines(file_path)

if __name__ == "__main__":
    if len(sys.argv) > 1:
        target_directory = sys.argv[1]
    else:
        target_directory = os.getcwd()

    process_files_in_directory(target_directory)