awoo
Browse filesSigned-off-by: Balazs Horvath <[email protected]>
dataset_tools/Check for Transparency.ipynb
DELETED
@@ -1,83 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"cells": [
|
3 |
-
{
|
4 |
-
"cell_type": "markdown",
|
5 |
-
"metadata": {},
|
6 |
-
"source": [
|
7 |
-
"## Check for Transparency\n",
|
8 |
-
"----\n",
|
9 |
-
"\n",
|
10 |
-
"The Python script recursively traverses a specified directory, identifying image files with extensions `.jpg`, `.jpeg`, and `.png`. For each identified image, it checks if it contains transparency by examining its mode with PIL."
|
11 |
-
]
|
12 |
-
},
|
13 |
-
{
|
14 |
-
"cell_type": "code",
|
15 |
-
"execution_count": 2,
|
16 |
-
"metadata": {},
|
17 |
-
"outputs": [
|
18 |
-
{
|
19 |
-
"name": "stdout",
|
20 |
-
"output_type": "stream",
|
21 |
-
"text": [
|
22 |
-
"No transparent images in your dataset!\n"
|
23 |
-
]
|
24 |
-
}
|
25 |
-
],
|
26 |
-
"source": [
|
27 |
-
"import os\n",
|
28 |
-
"from PIL import Image\n",
|
29 |
-
"\n",
|
30 |
-
"def check_transparency(image_path):\n",
|
31 |
-
" try:\n",
|
32 |
-
" image = Image.open(image_path)\n",
|
33 |
-
" if image.mode == 'RGBA':\n",
|
34 |
-
" return True\n",
|
35 |
-
" except Exception as e:\n",
|
36 |
-
" print(f\"Error processing {image_path}: {e}\")\n",
|
37 |
-
" return False\n",
|
38 |
-
"\n",
|
39 |
-
"def main():\n",
|
40 |
-
" directory = r'C:\\Users\\kade\\Desktop\\training_dir_staging'\n",
|
41 |
-
" transparent_images = []\n",
|
42 |
-
"\n",
|
43 |
-
" for root, _, files in os.walk(directory):\n",
|
44 |
-
" for file in files:\n",
|
45 |
-
" if file.lower().endswith(('.jpg', '.jpeg', '.png')):\n",
|
46 |
-
" file_path = os.path.join(root, file)\n",
|
47 |
-
" if check_transparency(file_path):\n",
|
48 |
-
" transparent_images.append(file_path)\n",
|
49 |
-
"\n",
|
50 |
-
" if transparent_images:\n",
|
51 |
-
" print(\"Images with transparency:\")\n",
|
52 |
-
" for img in transparent_images:\n",
|
53 |
-
" print(img)\n",
|
54 |
-
" else:\n",
|
55 |
-
" print(\"No transparent images in your dataset!\")\n",
|
56 |
-
"\n",
|
57 |
-
"if __name__ == \"__main__\":\n",
|
58 |
-
" main()"
|
59 |
-
]
|
60 |
-
}
|
61 |
-
],
|
62 |
-
"metadata": {
|
63 |
-
"kernelspec": {
|
64 |
-
"display_name": "base",
|
65 |
-
"language": "python",
|
66 |
-
"name": "python3"
|
67 |
-
},
|
68 |
-
"language_info": {
|
69 |
-
"codemirror_mode": {
|
70 |
-
"name": "ipython",
|
71 |
-
"version": 3
|
72 |
-
},
|
73 |
-
"file_extension": ".py",
|
74 |
-
"mimetype": "text/x-python",
|
75 |
-
"name": "python",
|
76 |
-
"nbconvert_exporter": "python",
|
77 |
-
"pygments_lexer": "ipython3",
|
78 |
-
"version": "3.12.2"
|
79 |
-
}
|
80 |
-
},
|
81 |
-
"nbformat": 4,
|
82 |
-
"nbformat_minor": 2
|
83 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
dataset_tools/done/Check for Transparency.ipynb
ADDED
The diff for this file is too large to render.
See raw diff
|
|
dataset_tools/{Kill Transparency with Black.ipynb → done/Replace Transparency with Black.ipynb}
RENAMED
@@ -4,10 +4,10 @@
|
|
4 |
"cell_type": "markdown",
|
5 |
"metadata": {},
|
6 |
"source": [
|
7 |
-
"##
|
8 |
"----\n",
|
9 |
"\n",
|
10 |
-
"This Python script utilizes the PIL (Python Imaging Library) to recursively traverse a specified directory, identifying image files with
|
11 |
]
|
12 |
},
|
13 |
{
|
@@ -18,8 +18,15 @@
|
|
18 |
"source": [
|
19 |
"import os\n",
|
20 |
"from PIL import Image\n",
|
|
|
21 |
"\n",
|
22 |
"def add_black_layer(image_path):\n",
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
" try:\n",
|
24 |
" with Image.open(image_path) as img:\n",
|
25 |
" black_layer = Image.new('RGB', img.size, (0, 0, 0))\n",
|
@@ -30,14 +37,18 @@
|
|
30 |
" print(f\"Error processing {image_path}: {e}\")\n",
|
31 |
"\n",
|
32 |
"def process_directory(directory):\n",
|
33 |
-
"
|
34 |
-
"
|
35 |
-
"
|
36 |
-
"
|
37 |
-
"
|
|
|
|
|
|
|
|
|
38 |
"\n",
|
39 |
"if __name__ == \"__main__\":\n",
|
40 |
-
" directory = r'
|
41 |
" process_directory(directory)"
|
42 |
]
|
43 |
}
|
|
|
4 |
"cell_type": "markdown",
|
5 |
"metadata": {},
|
6 |
"source": [
|
7 |
+
"## Replace Transparency with Black\n",
|
8 |
"----\n",
|
9 |
"\n",
|
10 |
+
"This Python script utilizes the PIL (Python Imaging Library) to recursively traverse a specified directory, identifying image files with extension `.png`, and adds a black layer behind the main layer of each image, effectively removing any existing transparency, before overwriting the original files with the modified versions."
|
11 |
]
|
12 |
},
|
13 |
{
|
|
|
18 |
"source": [
|
19 |
"import os\n",
|
20 |
"from PIL import Image\n",
|
21 |
+
"import glob\n",
|
22 |
"\n",
|
23 |
"def add_black_layer(image_path):\n",
|
24 |
+
" \"\"\"\n",
|
25 |
+
" Adds a black layer to the image at the given path and overwrites it.\n",
|
26 |
+
"\n",
|
27 |
+
" Parameters:\n",
|
28 |
+
" image_path (str): The file path to the image.\n",
|
29 |
+
" \"\"\"\n",
|
30 |
" try:\n",
|
31 |
" with Image.open(image_path) as img:\n",
|
32 |
" black_layer = Image.new('RGB', img.size, (0, 0, 0))\n",
|
|
|
37 |
" print(f\"Error processing {image_path}: {e}\")\n",
|
38 |
"\n",
|
39 |
"def process_directory(directory):\n",
|
40 |
+
" \"\"\"\n",
|
41 |
+
" Processes all .png images in the given directory and adds a black layer to them.\n",
|
42 |
+
"\n",
|
43 |
+
" Parameters:\n",
|
44 |
+
" directory (str): The directory path where the images are located.\n",
|
45 |
+
" \"\"\"\n",
|
46 |
+
" # Using glob to find all .png files in the directory recursively\n",
|
47 |
+
" for image_path in glob.glob(os.path.join(directory, '**', '*.png'), recursive=True):\n",
|
48 |
+
" add_black_layer(image_path)\n",
|
49 |
"\n",
|
50 |
"if __name__ == \"__main__\":\n",
|
51 |
+
" directory = r'E:\\training_dir'\n",
|
52 |
" process_directory(directory)"
|
53 |
]
|
54 |
}
|
dataset_tools/{e621 JSON to txt.ipynb → done/e621 JSON to txt.ipynb}
RENAMED
@@ -26,7 +26,9 @@
|
|
26 |
"metadata": {},
|
27 |
"outputs": [],
|
28 |
"source": [
|
29 |
-
"
|
|
|
|
|
30 |
"ignored_tags = [r\"\\bblizzard entertainment\\b\", r\"\\bwarcraft\\b\",\n",
|
31 |
" r\"(?:\\d{4})|(?:\\d+:\\d+)\",\n",
|
32 |
" r\"\\bdetailed\\b\", r\"\\bwidescreen\\b\", r\"\\b4k\\b\",\n",
|
@@ -976,16 +978,14 @@
|
|
976 |
"console = Console()\n",
|
977 |
"\n",
|
978 |
"\n",
|
979 |
-
"
|
980 |
-
"def should_ignore_tag(tag):\n",
|
981 |
" for ignored_tag_pattern in ignored_tags:\n",
|
982 |
" pattern = re.compile(ignored_tag_pattern, re.VERBOSE | re.IGNORECASE)\n",
|
983 |
-
" if re.search(pattern,
|
984 |
" return True\n",
|
985 |
" return False\n",
|
986 |
"\n",
|
987 |
"\n",
|
988 |
-
"# Function to process tags and determine if they should be ignored\n",
|
989 |
"def process_tags(tags_dict):\n",
|
990 |
" processed_tags = []\n",
|
991 |
" for category, tags_list in tags_dict.items():\n",
|
@@ -1010,7 +1010,6 @@
|
|
1010 |
" return processed_tags\n",
|
1011 |
"\n",
|
1012 |
"\n",
|
1013 |
-
"# Modify the process_file function to use the updated process_tags function\n",
|
1014 |
"def process_file(file_path):\n",
|
1015 |
" try:\n",
|
1016 |
" console.print(f\"Processing file: [bold]{file_path}[/bold]\")\n",
|
|
|
26 |
"metadata": {},
|
27 |
"outputs": [],
|
28 |
"source": [
|
29 |
+
"\"\"\"\n",
|
30 |
+
"Define tags to be ignored using regular expressions for exact matching\n",
|
31 |
+
"\"\"\"\n",
|
32 |
"ignored_tags = [r\"\\bblizzard entertainment\\b\", r\"\\bwarcraft\\b\",\n",
|
33 |
" r\"(?:\\d{4})|(?:\\d+:\\d+)\",\n",
|
34 |
" r\"\\bdetailed\\b\", r\"\\bwidescreen\\b\", r\"\\b4k\\b\",\n",
|
|
|
978 |
"console = Console()\n",
|
979 |
"\n",
|
980 |
"\n",
|
981 |
+
"def should_ignore_tag(tag, all_tags):\n",
|
|
|
982 |
" for ignored_tag_pattern in ignored_tags:\n",
|
983 |
" pattern = re.compile(ignored_tag_pattern, re.VERBOSE | re.IGNORECASE)\n",
|
984 |
+
" if any(re.search(pattern, t) for t in all_tags):\n",
|
985 |
" return True\n",
|
986 |
" return False\n",
|
987 |
"\n",
|
988 |
"\n",
|
|
|
989 |
"def process_tags(tags_dict):\n",
|
990 |
" processed_tags = []\n",
|
991 |
" for category, tags_list in tags_dict.items():\n",
|
|
|
1010 |
" return processed_tags\n",
|
1011 |
"\n",
|
1012 |
"\n",
|
|
|
1013 |
"def process_file(file_path):\n",
|
1014 |
" try:\n",
|
1015 |
" console.print(f\"Processing file: [bold]{file_path}[/bold]\")\n",
|