k4d3 commited on
Commit
3e238a0
1 Parent(s): 2c7b3e6

Signed-off-by: Balazs Horvath <[email protected]>

Files changed (1) hide show
  1. scripts/check_img_resolutions.py +78 -0
scripts/check_img_resolutions.py ADDED
@@ -0,0 +1,78 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This script checks the resolution of all images in a specified directory and its subdirectories.
3
+ If the resolution of an image exceeds a certain limit, the path of the image is written to an output file.
4
+ The script uses multiprocessing to speed up the process.
5
+ """
6
+
7
+ from pathlib import Path
8
+ import multiprocessing
9
+ import os
10
+ from PIL import Image
11
+
12
+
13
+ def check_image_resolution(filepath, output_file):
14
+ """
15
+ Checks the resolution of an image and writes the path of the image to a file if its resolution exceeds a certain limit.
16
+
17
+ Parameters:
18
+ filepath (Path): The path of the image file.
19
+ output_file (str): The path of the output file where the paths of oversized images will be written.
20
+
21
+ Returns:
22
+ None
23
+ """
24
+ if filepath.suffix in [".jpg", ".jpeg", ".png"]:
25
+ img = Image.open(filepath)
26
+ width, height = img.size
27
+ resolution = width * height
28
+ if resolution > 16777216:
29
+ normalized_path = os.path.normpath(str(filepath))
30
+ print(
31
+ f"The image {normalized_path} has a resolution of {resolution} pixels which is more than 16777216 pixels."
32
+ )
33
+ with open(output_file, "a", encoding="utf-8") as f:
34
+ f.write(f"{normalized_path}\n")
35
+
36
+
37
+ def process_directory(directory, output_file):
38
+ """
39
+ Processes all files in a directory and its subdirectories.
40
+
41
+ Parameters:
42
+ directory (str): The path of the directory to be processed.
43
+ output_file (str): The path of the output file where the paths of oversized images will be written.
44
+
45
+ Returns:
46
+ None
47
+ """
48
+ for filepath in Path(directory).rglob("*"):
49
+ check_image_resolution(filepath, output_file)
50
+
51
+
52
+ def main(output_file):
53
+ """
54
+ Main function that creates a pool of worker processes and applies the process_directory function asynchronously.
55
+
56
+ Parameters:
57
+ output_file (str): The path of the output file where the paths of oversized images will be written.
58
+
59
+ Returns:
60
+ None
61
+ """
62
+ # Get the number of available CPU cores
63
+ num_cores = multiprocessing.cpu_count()
64
+
65
+ # Create a pool of worker processes
66
+ pool = multiprocessing.Pool(num_cores)
67
+
68
+ # Call the function with the path to your directory
69
+ pool.apply_async(process_directory, args=(r"E:\training_dir", output_file))
70
+
71
+ # Close the pool and wait for all tasks to complete
72
+ pool.close()
73
+ pool.join()
74
+
75
+
76
+ if __name__ == "__main__":
77
+ OUTPUT_FILE = "oversized.txt"
78
+ main(OUTPUT_FILE)