toolkit / remove_boys
k4d3's picture
update every stupid script
c2cc76d
raw
history blame
1.81 kB
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import os
import re
import sys
from pathlib import Path
def remove_boys(target_dir='.'):
# Use pathlib to handle the directory
target_path = Path(target_dir)
# Walk through the directory and its subdirectories using glob
for file_path in target_path.rglob('*.txt'):
with open(file_path, 'r+', encoding='utf-8') as f:
content = f.read()
# Remove occurrences of [1-9]boy, [1-9]boys, [1-9]girl, and [1-9]girls along with a comma and space character
content = re.sub(r',\s*([1-9]boy|[1-9]boys|[1-9]girl|[1-9]girls)', '', content)
# Remove any remaining occurrences of the tags without the comma and space
content = re.sub(r'([1-9]boy|[1-9]boys|[1-9]girl|[1-9]girls)', '', content)
# Remove any trailing spaces and commas
content = re.sub(r',\s*,', ',', content) # Remove double commas
content = re.sub(r',\s*$', '', content) # Remove trailing comma
content = re.sub(r'^\s*,', '', content) # Remove leading comma
content = re.sub(r'\s*,\s*', ', ', content) # Normalize comma spacing
# Remove any leftover commas at the end of the line
content = re.sub(r',\s*$', '', content)
# Remove any leftover commas at the beginning of the line
content = re.sub(r'^\s*,', '', content)
# Remove any leftover commas in the middle of the line
content = re.sub(r',\s*,', ',', content)
f.seek(0)
f.write(content)
f.truncate()
if __name__ == "__main__":
# Get the directory from the command line argument or use the current directory
target_dir = sys.argv[1] if len(sys.argv) > 1 else '.'
remove_boys(target_dir)