File size: 1,814 Bytes
eb1fe03
c2cc76d
eb1fe03
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import re
import sys
from pathlib import Path

def remove_boys(target_dir='.'):
    # Use pathlib to handle the directory
    target_path = Path(target_dir)
    
    # Walk through the directory and its subdirectories using glob
    for file_path in target_path.rglob('*.txt'):
        with open(file_path, 'r+', encoding='utf-8') as f:
            content = f.read()
            # Remove occurrences of [1-9]boy, [1-9]boys, [1-9]girl, and [1-9]girls along with a comma and space character
            content = re.sub(r',\s*([1-9]boy|[1-9]boys|[1-9]girl|[1-9]girls)', '', content)
            # Remove any remaining occurrences of the tags without the comma and space
            content = re.sub(r'([1-9]boy|[1-9]boys|[1-9]girl|[1-9]girls)', '', content)
            # Remove any trailing spaces and commas
            content = re.sub(r',\s*,', ',', content)  # Remove double commas
            content = re.sub(r',\s*$', '', content)   # Remove trailing comma
            content = re.sub(r'^\s*,', '', content)   # Remove leading comma
            content = re.sub(r'\s*,\s*', ', ', content)  # Normalize comma spacing
            # Remove any leftover commas at the end of the line
            content = re.sub(r',\s*$', '', content)
            # Remove any leftover commas at the beginning of the line
            content = re.sub(r'^\s*,', '', content)
            # Remove any leftover commas in the middle of the line
            content = re.sub(r',\s*,', ',', content)
            f.seek(0)
            f.write(content)
            f.truncate()

if __name__ == "__main__":
    # Get the directory from the command line argument or use the current directory
    target_dir = sys.argv[1] if len(sys.argv) > 1 else '.'
    remove_boys(target_dir)