k4d3 commited on
Commit
7ab3132
•
1 Parent(s): 594b99e

Signed-off-by: Balazs Horvath <[email protected]>

dataset_tools/Check for Transparency.ipynb DELETED
@@ -1,83 +0,0 @@
1
- {
2
- "cells": [
3
- {
4
- "cell_type": "markdown",
5
- "metadata": {},
6
- "source": [
7
- "## Check for Transparency\n",
8
- "----\n",
9
- "\n",
10
- "The Python script recursively traverses a specified directory, identifying image files with extensions `.jpg`, `.jpeg`, and `.png`. For each identified image, it checks if it contains transparency by examining its mode with PIL."
11
- ]
12
- },
13
- {
14
- "cell_type": "code",
15
- "execution_count": 2,
16
- "metadata": {},
17
- "outputs": [
18
- {
19
- "name": "stdout",
20
- "output_type": "stream",
21
- "text": [
22
- "No transparent images in your dataset!\n"
23
- ]
24
- }
25
- ],
26
- "source": [
27
- "import os\n",
28
- "from PIL import Image\n",
29
- "\n",
30
- "def check_transparency(image_path):\n",
31
- " try:\n",
32
- " image = Image.open(image_path)\n",
33
- " if image.mode == 'RGBA':\n",
34
- " return True\n",
35
- " except Exception as e:\n",
36
- " print(f\"Error processing {image_path}: {e}\")\n",
37
- " return False\n",
38
- "\n",
39
- "def main():\n",
40
- " directory = r'C:\\Users\\kade\\Desktop\\training_dir_staging'\n",
41
- " transparent_images = []\n",
42
- "\n",
43
- " for root, _, files in os.walk(directory):\n",
44
- " for file in files:\n",
45
- " if file.lower().endswith(('.jpg', '.jpeg', '.png')):\n",
46
- " file_path = os.path.join(root, file)\n",
47
- " if check_transparency(file_path):\n",
48
- " transparent_images.append(file_path)\n",
49
- "\n",
50
- " if transparent_images:\n",
51
- " print(\"Images with transparency:\")\n",
52
- " for img in transparent_images:\n",
53
- " print(img)\n",
54
- " else:\n",
55
- " print(\"No transparent images in your dataset!\")\n",
56
- "\n",
57
- "if __name__ == \"__main__\":\n",
58
- " main()"
59
- ]
60
- }
61
- ],
62
- "metadata": {
63
- "kernelspec": {
64
- "display_name": "base",
65
- "language": "python",
66
- "name": "python3"
67
- },
68
- "language_info": {
69
- "codemirror_mode": {
70
- "name": "ipython",
71
- "version": 3
72
- },
73
- "file_extension": ".py",
74
- "mimetype": "text/x-python",
75
- "name": "python",
76
- "nbconvert_exporter": "python",
77
- "pygments_lexer": "ipython3",
78
- "version": "3.12.2"
79
- }
80
- },
81
- "nbformat": 4,
82
- "nbformat_minor": 2
83
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
dataset_tools/done/Check for Transparency.ipynb ADDED
The diff for this file is too large to render. See raw diff
 
dataset_tools/{Kill Transparency with Black.ipynb → done/Replace Transparency with Black.ipynb} RENAMED
@@ -4,10 +4,10 @@
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
- "## Kill Transparency with Black\n",
8
  "----\n",
9
  "\n",
10
- "This Python script utilizes the PIL (Python Imaging Library) to recursively traverse a specified directory, identifying image files with extensions `.jpg`, `.jpeg`, or `.png`, and adds a black layer behind the main layer of each image, effectively removing any existing transparency, before overwriting the original files with the modified versions."
11
  ]
12
  },
13
  {
@@ -18,8 +18,15 @@
18
  "source": [
19
  "import os\n",
20
  "from PIL import Image\n",
 
21
  "\n",
22
  "def add_black_layer(image_path):\n",
 
 
 
 
 
 
23
  " try:\n",
24
  " with Image.open(image_path) as img:\n",
25
  " black_layer = Image.new('RGB', img.size, (0, 0, 0))\n",
@@ -30,14 +37,18 @@
30
  " print(f\"Error processing {image_path}: {e}\")\n",
31
  "\n",
32
  "def process_directory(directory):\n",
33
- " for root, dirs, files in os.walk(directory):\n",
34
- " for file in files:\n",
35
- " if file.lower().endswith(('.jpg', '.jpeg', '.png')):\n",
36
- " file_path = os.path.join(root, file)\n",
37
- " add_black_layer(file_path)\n",
 
 
 
 
38
  "\n",
39
  "if __name__ == \"__main__\":\n",
40
- " directory = r'C:\\Users\\kade\\Desktop\\training_dir_staging'\n",
41
  " process_directory(directory)"
42
  ]
43
  }
 
4
  "cell_type": "markdown",
5
  "metadata": {},
6
  "source": [
7
+ "## Replace Transparency with Black\n",
8
  "----\n",
9
  "\n",
10
+ "This Python script utilizes the PIL (Python Imaging Library) to recursively traverse a specified directory, identifying image files with extension `.png`, and adds a black layer behind the main layer of each image, effectively removing any existing transparency, before overwriting the original files with the modified versions."
11
  ]
12
  },
13
  {
 
18
  "source": [
19
  "import os\n",
20
  "from PIL import Image\n",
21
+ "import glob\n",
22
  "\n",
23
  "def add_black_layer(image_path):\n",
24
+ " \"\"\"\n",
25
+ " Adds a black layer to the image at the given path and overwrites it.\n",
26
+ "\n",
27
+ " Parameters:\n",
28
+ " image_path (str): The file path to the image.\n",
29
+ " \"\"\"\n",
30
  " try:\n",
31
  " with Image.open(image_path) as img:\n",
32
  " black_layer = Image.new('RGB', img.size, (0, 0, 0))\n",
 
37
  " print(f\"Error processing {image_path}: {e}\")\n",
38
  "\n",
39
  "def process_directory(directory):\n",
40
+ " \"\"\"\n",
41
+ " Processes all .png images in the given directory and adds a black layer to them.\n",
42
+ "\n",
43
+ " Parameters:\n",
44
+ " directory (str): The directory path where the images are located.\n",
45
+ " \"\"\"\n",
46
+ " # Using glob to find all .png files in the directory recursively\n",
47
+ " for image_path in glob.glob(os.path.join(directory, '**', '*.png'), recursive=True):\n",
48
+ " add_black_layer(image_path)\n",
49
  "\n",
50
  "if __name__ == \"__main__\":\n",
51
+ " directory = r'E:\\training_dir'\n",
52
  " process_directory(directory)"
53
  ]
54
  }
dataset_tools/{e621 JSON to txt.ipynb → done/e621 JSON to txt.ipynb} RENAMED
@@ -26,7 +26,9 @@
26
  "metadata": {},
27
  "outputs": [],
28
  "source": [
29
- "# Define tags to be ignored using regular expressions for exact matching\n",
 
 
30
  "ignored_tags = [r\"\\bblizzard entertainment\\b\", r\"\\bwarcraft\\b\",\n",
31
  " r\"(?:\\d{4})|(?:\\d+:\\d+)\",\n",
32
  " r\"\\bdetailed\\b\", r\"\\bwidescreen\\b\", r\"\\b4k\\b\",\n",
@@ -976,16 +978,14 @@
976
  "console = Console()\n",
977
  "\n",
978
  "\n",
979
- "# Function to check if a tag should be ignored based on the ignored_tags list\n",
980
- "def should_ignore_tag(tag):\n",
981
  " for ignored_tag_pattern in ignored_tags:\n",
982
  " pattern = re.compile(ignored_tag_pattern, re.VERBOSE | re.IGNORECASE)\n",
983
- " if re.search(pattern, tag):\n",
984
  " return True\n",
985
  " return False\n",
986
  "\n",
987
  "\n",
988
- "# Function to process tags and determine if they should be ignored\n",
989
  "def process_tags(tags_dict):\n",
990
  " processed_tags = []\n",
991
  " for category, tags_list in tags_dict.items():\n",
@@ -1010,7 +1010,6 @@
1010
  " return processed_tags\n",
1011
  "\n",
1012
  "\n",
1013
- "# Modify the process_file function to use the updated process_tags function\n",
1014
  "def process_file(file_path):\n",
1015
  " try:\n",
1016
  " console.print(f\"Processing file: [bold]{file_path}[/bold]\")\n",
 
26
  "metadata": {},
27
  "outputs": [],
28
  "source": [
29
+ "\"\"\"\n",
30
+ "Define tags to be ignored using regular expressions for exact matching\n",
31
+ "\"\"\"\n",
32
  "ignored_tags = [r\"\\bblizzard entertainment\\b\", r\"\\bwarcraft\\b\",\n",
33
  " r\"(?:\\d{4})|(?:\\d+:\\d+)\",\n",
34
  " r\"\\bdetailed\\b\", r\"\\bwidescreen\\b\", r\"\\b4k\\b\",\n",
 
978
  "console = Console()\n",
979
  "\n",
980
  "\n",
981
+ "def should_ignore_tag(tag, all_tags):\n",
 
982
  " for ignored_tag_pattern in ignored_tags:\n",
983
  " pattern = re.compile(ignored_tag_pattern, re.VERBOSE | re.IGNORECASE)\n",
984
+ " if any(re.search(pattern, t) for t in all_tags):\n",
985
  " return True\n",
986
  " return False\n",
987
  "\n",
988
  "\n",
 
989
  "def process_tags(tags_dict):\n",
990
  " processed_tags = []\n",
991
  " for category, tags_list in tags_dict.items():\n",
 
1010
  " return processed_tags\n",
1011
  "\n",
1012
  "\n",
 
1013
  "def process_file(file_path):\n",
1014
  " try:\n",
1015
  " console.print(f\"Processing file: [bold]{file_path}[/bold]\")\n",