awoo

Browse files

Signed-off-by: Balazs Horvath <[email protected]>

Files changed (4) hide show

dataset_tools/Check for Transparency.ipynb +0 -83
dataset_tools/done/Check for Transparency.ipynb +0 -0
dataset_tools/{Kill Transparency with Black.ipynb → done/Replace Transparency with Black.ipynb} +19 -8
dataset_tools/{e621 JSON to txt.ipynb → done/e621 JSON to txt.ipynb} +5 -6

dataset_tools/Check for Transparency.ipynb DELETED Viewed

@@ -1,83 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "## Check for Transparency\n",
-    "----\n",
-    "\n",
-    "The Python script recursively traverses a specified directory, identifying image files with extensions `.jpg`, `.jpeg`, and `.png`. For each identified image, it checks if it contains transparency by examining its mode with PIL."
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "No transparent images in your dataset!\n"
-     ]
-    }
-   ],
-   "source": [
-    "import os\n",
-    "from PIL import Image\n",
-    "\n",
-    "def check_transparency(image_path):\n",
-    "    try:\n",
-    "        image = Image.open(image_path)\n",
-    "        if image.mode == 'RGBA':\n",
-    "            return True\n",
-    "    except Exception as e:\n",
-    "        print(f\"Error processing {image_path}: {e}\")\n",
-    "    return False\n",
-    "\n",
-    "def main():\n",
-    "    directory = r'C:\\Users\\kade\\Desktop\\training_dir_staging'\n",
-    "    transparent_images = []\n",
-    "\n",
-    "    for root, _, files in os.walk(directory):\n",
-    "        for file in files:\n",
-    "            if file.lower().endswith(('.jpg', '.jpeg', '.png')):\n",
-    "                file_path = os.path.join(root, file)\n",
-    "                if check_transparency(file_path):\n",
-    "                    transparent_images.append(file_path)\n",
-    "\n",
-    "    if transparent_images:\n",
-    "        print(\"Images with transparency:\")\n",
-    "        for img in transparent_images:\n",
-    "            print(img)\n",
-    "    else:\n",
-    "        print(\"No transparent images in your dataset!\")\n",
-    "\n",
-    "if __name__ == \"__main__\":\n",
-    "    main()"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "base",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.12.2"
-  }
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

dataset_tools/done/Check for Transparency.ipynb ADDED Viewed

The diff for this file is too large to render. See raw diff

dataset_tools/{Kill Transparency with Black.ipynb → done/Replace Transparency with Black.ipynb} RENAMED Viewed

@@ -4,10 +4,10 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Kill Transparency with Black\n",
     "----\n",
     "\n",
-    "This Python script utilizes the PIL (Python Imaging Library) to recursively traverse a specified directory, identifying image files with extensions `.jpg`, `.jpeg`, or `.png`, and adds a black layer behind the main layer of each image, effectively removing any existing transparency, before overwriting the original files with the modified versions."
    ]
   },
   {
@@ -18,8 +18,15 @@
    "source": [
     "import os\n",
     "from PIL import Image\n",
     "\n",
     "def add_black_layer(image_path):\n",
     "    try:\n",
     "        with Image.open(image_path) as img:\n",
     "            black_layer = Image.new('RGB', img.size, (0, 0, 0))\n",
@@ -30,14 +37,18 @@
     "        print(f\"Error processing {image_path}: {e}\")\n",
     "\n",
     "def process_directory(directory):\n",
-    "    for root, dirs, files in os.walk(directory):\n",
-    "        for file in files:\n",
-    "            if file.lower().endswith(('.jpg', '.jpeg', '.png')):\n",
-    "                file_path = os.path.join(root, file)\n",
-    "                add_black_layer(file_path)\n",
     "\n",
     "if __name__ == \"__main__\":\n",
-    "    directory = r'C:\\Users\\kade\\Desktop\\training_dir_staging'\n",
     "    process_directory(directory)"
    ]
   }

    "cell_type": "markdown",
    "metadata": {},
    "source": [
+    "## Replace Transparency with Black\n",
     "----\n",
     "\n",
+    "This Python script utilizes the PIL (Python Imaging Library) to recursively traverse a specified directory, identifying image files with extension `.png`, and adds a black layer behind the main layer of each image, effectively removing any existing transparency, before overwriting the original files with the modified versions."
    ]
   },
   {
    "source": [
     "import os\n",
     "from PIL import Image\n",
+    "import glob\n",
     "\n",
     "def add_black_layer(image_path):\n",
+    "    \"\"\"\n",
+    "    Adds a black layer to the image at the given path and overwrites it.\n",
+    "\n",
+    "    Parameters:\n",
+    "    image_path (str): The file path to the image.\n",
+    "    \"\"\"\n",
     "    try:\n",
     "        with Image.open(image_path) as img:\n",
     "            black_layer = Image.new('RGB', img.size, (0, 0, 0))\n",
     "        print(f\"Error processing {image_path}: {e}\")\n",
     "\n",
     "def process_directory(directory):\n",
+    "    \"\"\"\n",
+    "    Processes all .png images in the given directory and adds a black layer to them.\n",
+    "\n",
+    "    Parameters:\n",
+    "    directory (str): The directory path where the images are located.\n",
+    "    \"\"\"\n",
+    "    # Using glob to find all .png files in the directory recursively\n",
+    "    for image_path in glob.glob(os.path.join(directory, '**', '*.png'), recursive=True):\n",
+    "        add_black_layer(image_path)\n",
     "\n",
     "if __name__ == \"__main__\":\n",
+    "    directory = r'E:\\training_dir'\n",
     "    process_directory(directory)"
    ]
   }

dataset_tools/{e621 JSON to txt.ipynb → done/e621 JSON to txt.ipynb} RENAMED Viewed

@@ -26,7 +26,9 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Define tags to be ignored using regular expressions for exact matching\n",
     "ignored_tags = [r\"\\bblizzard entertainment\\b\", r\"\\bwarcraft\\b\",\n",
     "    r\"(?:\\d{4})|(?:\\d+:\\d+)\",\n",
     "    r\"\\bdetailed\\b\", r\"\\bwidescreen\\b\", r\"\\b4k\\b\",\n",
@@ -976,16 +978,14 @@
     "console = Console()\n",
     "\n",
     "\n",
-    "# Function to check if a tag should be ignored based on the ignored_tags list\n",
-    "def should_ignore_tag(tag):\n",
     "    for ignored_tag_pattern in ignored_tags:\n",
     "        pattern = re.compile(ignored_tag_pattern, re.VERBOSE | re.IGNORECASE)\n",
-    "        if re.search(pattern, tag):\n",
     "            return True\n",
     "    return False\n",
     "\n",
     "\n",
-    "# Function to process tags and determine if they should be ignored\n",
     "def process_tags(tags_dict):\n",
     "    processed_tags = []\n",
     "    for category, tags_list in tags_dict.items():\n",
@@ -1010,7 +1010,6 @@
     "    return processed_tags\n",
     "\n",
     "\n",
-    "# Modify the process_file function to use the updated process_tags function\n",
     "def process_file(file_path):\n",
     "    try:\n",
     "        console.print(f\"Processing file: [bold]{file_path}[/bold]\")\n",

    "metadata": {},
    "outputs": [],
    "source": [
+    "\"\"\"\n",
+    "Define tags to be ignored using regular expressions for exact matching\n",
+    "\"\"\"\n",
     "ignored_tags = [r\"\\bblizzard entertainment\\b\", r\"\\bwarcraft\\b\",\n",
     "    r\"(?:\\d{4})|(?:\\d+:\\d+)\",\n",
     "    r\"\\bdetailed\\b\", r\"\\bwidescreen\\b\", r\"\\b4k\\b\",\n",
     "console = Console()\n",
     "\n",
     "\n",
+    "def should_ignore_tag(tag, all_tags):\n",
     "    for ignored_tag_pattern in ignored_tags:\n",
     "        pattern = re.compile(ignored_tag_pattern, re.VERBOSE | re.IGNORECASE)\n",
+    "        if any(re.search(pattern, t) for t in all_tags):\n",
     "            return True\n",
     "    return False\n",
     "\n",
     "\n",
     "def process_tags(tags_dict):\n",
     "    processed_tags = []\n",
     "    for category, tags_list in tags_dict.items():\n",
     "    return processed_tags\n",
     "\n",
     "\n",
     "def process_file(file_path):\n",
     "    try:\n",
     "        console.print(f\"Processing file: [bold]{file_path}[/bold]\")\n",