{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "## Escape parentheses\n", "----\n", "\n", "This script is designed to process a directory and its subdirectories for text files. For each text file, it reads the content and checks for any unescaped parentheses. If it finds any, it escapes them and writes the modified content back to the file. If any modifications are made, it prints a warning message along with the original and modified content. The directory to be processed is specified by the `directory_path` variable. In this case, it is set to `C:\\Users\\kade\\Desktop\\training_dir_staging`. The script starts processing from this directory. \n", "\n", "The **`escape_parentheses(file_path)`** function takes a file path as an argument, reads the file content, and checks for unescaped parentheses. It uses regular expressions to find unescaped parentheses and escapes them. If the content is modified, it prints a warning message along with the original and modified content. Then, it writes the modified content back to the file.\n", "\n", "The **`process_directory(directory)`** function takes a directory path as an argument and processes it recursively. It uses the `os.walk()` function to iterate over the directory, its subdirectories, and files. For each text file (files ending with `.txt`), it calls the `escape_parentheses(file_path)` function to escape unescaped parentheses. It also processes each subdirectory in the same way. " ] }, { "cell_type": "code", "execution_count": 1, "metadata": {}, "outputs": [], "source": [ "import os\n", "import re\n", "\n", "def escape_parentheses(file_path):\n", " with open(file_path, \"r\") as file:\n", " content = file.read()\n", " original_content = content\n", "\n", " # Escape unescaped opening parentheses\n", " content = re.sub(r\"(? 31\u001b[0m \u001b[43mprocess_directory\u001b[49m\u001b[43m(\u001b[49m\u001b[43mdirectory_path\u001b[49m\u001b[43m)\u001b[49m\n", "Cell \u001b[1;32mIn[8], line 27\u001b[0m, in \u001b[0;36mprocess_directory\u001b[1;34m(directory)\u001b[0m\n\u001b[0;32m 25\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mprocess_directory\u001b[39m(directory):\n\u001b[0;32m 26\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m file_path \u001b[38;5;129;01min\u001b[39;00m Path(directory)\u001b[38;5;241m.\u001b[39mrglob(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m*.txt\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m---> 27\u001b[0m \u001b[43mescape_parentheses\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;28;43mstr\u001b[39;49m\u001b[43m(\u001b[49m\u001b[43mfile_path\u001b[49m\u001b[43m)\u001b[49m\u001b[43m)\u001b[49m\n", "Cell \u001b[1;32mIn[8], line 11\u001b[0m, in \u001b[0;36mescape_parentheses\u001b[1;34m(file_path)\u001b[0m\n\u001b[0;32m 8\u001b[0m original_content \u001b[38;5;241m=\u001b[39m content\n\u001b[0;32m 10\u001b[0m \u001b[38;5;66;03m# Escape unescaped opening parentheses\u001b[39;00m\n\u001b[1;32m---> 11\u001b[0m content \u001b[38;5;241m=\u001b[39m \u001b[43mre\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43msub\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;124;43mr\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m(? 186\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43m_compile\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mflags\u001b[49m\u001b[43m)\u001b[49m\u001b[38;5;241m.\u001b[39msub(repl, string, count)\n", "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.12_3.12.1008.0_x64__qbz5n2kfra8p0\\Lib\\re\\__init__.py:307\u001b[0m, in \u001b[0;36m_compile\u001b[1;34m(pattern, flags)\u001b[0m\n\u001b[0;32m 301\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mwarnings\u001b[39;00m\n\u001b[0;32m 302\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mThe re.TEMPLATE/re.T flag is deprecated \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 303\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mas it is an undocumented flag \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 304\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mwithout an obvious purpose. \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[0;32m 305\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mDon\u001b[39m\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mt use it.\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 306\u001b[0m \u001b[38;5;167;01mDeprecationWarning\u001b[39;00m)\n\u001b[1;32m--> 307\u001b[0m p \u001b[38;5;241m=\u001b[39m \u001b[43m_compiler\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mcompile\u001b[49m\u001b[43m(\u001b[49m\u001b[43mpattern\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mflags\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 308\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m flags \u001b[38;5;241m&\u001b[39m DEBUG:\n\u001b[0;32m 309\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m p\n", "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.12_3.12.1008.0_x64__qbz5n2kfra8p0\\Lib\\re\\_compiler.py:745\u001b[0m, in \u001b[0;36mcompile\u001b[1;34m(p, flags)\u001b[0m\n\u001b[0;32m 743\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m isstring(p):\n\u001b[0;32m 744\u001b[0m pattern \u001b[38;5;241m=\u001b[39m p\n\u001b[1;32m--> 745\u001b[0m p \u001b[38;5;241m=\u001b[39m \u001b[43m_parser\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mparse\u001b[49m\u001b[43m(\u001b[49m\u001b[43mp\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mflags\u001b[49m\u001b[43m)\u001b[49m\n\u001b[0;32m 746\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m 747\u001b[0m pattern \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m\n", "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.12_3.12.1008.0_x64__qbz5n2kfra8p0\\Lib\\re\\_parser.py:979\u001b[0m, in \u001b[0;36mparse\u001b[1;34m(str, flags, state)\u001b[0m\n\u001b[0;32m 976\u001b[0m state\u001b[38;5;241m.\u001b[39mflags \u001b[38;5;241m=\u001b[39m flags\n\u001b[0;32m 977\u001b[0m state\u001b[38;5;241m.\u001b[39mstr \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mstr\u001b[39m\n\u001b[1;32m--> 979\u001b[0m p \u001b[38;5;241m=\u001b[39m \u001b[43m_parse_sub\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mflags\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m&\u001b[39;49m\u001b[43m \u001b[49m\u001b[43mSRE_FLAG_VERBOSE\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0\u001b[39;49m\u001b[43m)\u001b[49m\n\u001b[0;32m 980\u001b[0m p\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mflags \u001b[38;5;241m=\u001b[39m fix_flags(\u001b[38;5;28mstr\u001b[39m, p\u001b[38;5;241m.\u001b[39mstate\u001b[38;5;241m.\u001b[39mflags)\n\u001b[0;32m 982\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m source\u001b[38;5;241m.\u001b[39mnext \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n", "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.12_3.12.1008.0_x64__qbz5n2kfra8p0\\Lib\\re\\_parser.py:460\u001b[0m, in \u001b[0;36m_parse_sub\u001b[1;34m(source, state, verbose, nested)\u001b[0m\n\u001b[0;32m 458\u001b[0m start \u001b[38;5;241m=\u001b[39m source\u001b[38;5;241m.\u001b[39mtell()\n\u001b[0;32m 459\u001b[0m \u001b[38;5;28;01mwhile\u001b[39;00m \u001b[38;5;28;01mTrue\u001b[39;00m:\n\u001b[1;32m--> 460\u001b[0m itemsappend(\u001b[43m_parse\u001b[49m\u001b[43m(\u001b[49m\u001b[43msource\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstate\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mverbose\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mnested\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[0;32m 461\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mnested\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mand\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mnot\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[43mitems\u001b[49m\u001b[43m)\u001b[49m)\n\u001b[0;32m 462\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m sourcematch(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m|\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[0;32m 463\u001b[0m \u001b[38;5;28;01mbreak\u001b[39;00m\n", "File \u001b[1;32mC:\\Program Files\\WindowsApps\\PythonSoftwareFoundation.Python.3.12_3.12.1008.0_x64__qbz5n2kfra8p0\\Lib\\re\\_parser.py:864\u001b[0m, in \u001b[0;36m_parse\u001b[1;34m(source, state, verbose, nested, first)\u001b[0m\n\u001b[0;32m 862\u001b[0m p \u001b[38;5;241m=\u001b[39m _parse_sub(source, state, sub_verbose, nested \u001b[38;5;241m+\u001b[39m \u001b[38;5;241m1\u001b[39m)\n\u001b[0;32m 863\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m source\u001b[38;5;241m.\u001b[39mmatch(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m)\u001b[39m\u001b[38;5;124m\"\u001b[39m):\n\u001b[1;32m--> 864\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m source\u001b[38;5;241m.\u001b[39merror(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mmissing ), unterminated subpattern\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[0;32m 865\u001b[0m source\u001b[38;5;241m.\u001b[39mtell() \u001b[38;5;241m-\u001b[39m start)\n\u001b[0;32m 866\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m group \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m 867\u001b[0m state\u001b[38;5;241m.\u001b[39mclosegroup(group, p)\n", "\u001b[1;31merror\u001b[0m: missing ), unterminated subpattern at position 26" ] } ], "source": [ "import os\n", "import re\n", "\n", "\n", "def escape_parentheses(file_path):\n", " with open(file_path, \"r\") as file:\n", " content = file.read()\n", " original_content = content\n", "\n", " # Escape unescaped opening parentheses\n", " content = re.sub(r\"(?