|
""" |
|
To check the files (images), whether any of them has unusual size / dimension. |
|
""" |
|
|
|
import os |
|
from PIL import Image |
|
import pandas as pd |
|
import argparse |
|
from tqdm import tqdm |
|
from typing import Union |
|
|
|
|
|
def main(args): |
|
dataset_path = args.dataset_path |
|
sizes = [] |
|
dimensions = [] |
|
fpaths = [] |
|
size_less_than_100X100 = [] |
|
|
|
for filename in tqdm(os.listdir(dataset_path)): |
|
if filename.endswith(".jpg") or filename.endswith(".png"): |
|
image_path = os.path.join(dataset_path, filename) |
|
fpaths.append(image_path) |
|
|
|
with Image.open(image_path) as img: |
|
sizes.append(os.path.getsize(image_path)) |
|
dim = img.size |
|
dimensions.append(img.size) |
|
size_less_than_100X100.append((lambda dim: 1 if dim[0]*dim[1] < 10000 else 0)(dim)) |
|
|
|
if args.create_dataframe: |
|
df = pd.DataFrame({ |
|
"fpath": fpaths, |
|
"img_size": sizes, |
|
"dimensions": dimensions, |
|
"small_size": size_less_than_100X100 |
|
}) |
|
|
|
df.to_csv(args.create_dataframe, index=False) |
|
print(f"Dataframe saved at {args.create_dataframe}.") |
|
|
|
|
|
|
|
|
|
|
|
|
|
print(pd.Series(sizes).describe()) |
|
|
|
if __name__ == "__main__": |
|
parser = argparse.ArgumentParser() |
|
parser.add_argument("--dataset-path", type=str, required=True, help="Path of the dataset of images to be checked.") |
|
parser.add_argument("--create-dataframe", type=str, default="report_imgs_size.csv", help="Name of the dataframe if you want to create.") |
|
args = parser.parse_args() |
|
main(args) |