import argparse import os from collections import defaultdict from typing import Dict, List, Optional import cv2 import pandas as pd import tqdm from mivolo.data.data_reader import PictureInfo, get_all_files from mivolo.model.yolo_detector import Detector, PersonAndFaceResult from preparation_utils import get_additional_bboxes, get_main_face, save_annotations def read_adience_annotations(annotations_files): annotations_per_image = {} stat_per_fold = defaultdict(int) cols = ["user_id", "original_image", "face_id", "age", "gender"] for file in annotations_files: fold_name = os.path.basename(file).split(".")[0] df = pd.read_csv(file, sep="\t", usecols=cols) for index, row in df.iterrows(): face_id, img_name, user_id = row["face_id"], row["original_image"], row["user_id"] aligned_face_path = f"faces/{user_id}/coarse_tilt_aligned_face.{face_id}.{img_name}" age, gender = row["age"], row["gender"] gender = gender.upper() if isinstance(gender, str) and gender != "u" else None age = age if isinstance(age, str) else None annotations_per_image[aligned_face_path] = {"age": age, "gender": gender, "fold": fold_name} stat_per_fold[fold_name] += 1 print(f"Per fold images: {stat_per_fold}") return annotations_per_image def read_data(images_dir, annotations_files, data_dir) -> List[PictureInfo]: dataset_pictures: List[PictureInfo] = [] all_images = get_all_files(images_dir) annotations_per_file = read_adience_annotations(annotations_files) total, missed = 0, 0 stat_per_gender: Dict[str, int] = defaultdict(int) missed_gender, missed_age, missed_gender_and_age = 0, 0, 0 stat_per_ages: Dict[str, int] = defaultdict(int) # final age classes: '0;2', "4;6", "8;12", "15;20", "25;32", "38;43", "48;53", "60;100" age_map = { "2": "(0, 2)", "3": "(0, 2)", "13": "(8, 12)", "(8, 23)": "(8, 12)", "22": "(15, 20)", "23": "(25, 32)", "29": "(25, 32)", "(27, 32)": "(25, 32)", "32": "(25, 32)", "34": "(25, 32)", "35": "(25, 32)", "36": "(38, 43)", "(38, 42)": "(38, 43)", "(38, 48)": "(38, 43)", "42": "(38, 43)", "45": "(38, 43)", "46": "(48, 53)", "55": "(48, 53)", "56": "(48, 53)", "57": "(60, 100)", "58": "(60, 100)", } for image_path in all_images: total += 1 relative_path = image_path.replace(f"{data_dir}/", "") if relative_path not in annotations_per_file: missed += 1 print("Can not find annotation for ", relative_path) else: annot = annotations_per_file[relative_path] age, gender = annot["age"], annot["gender"] if gender is None and age is not None: missed_gender += 1 elif age is None and gender is not None: missed_age += 1 elif gender is None and age is None: missed_gender_and_age += 1 # skip such image continue if gender is not None: stat_per_gender[gender] += 1 if age is not None: age = age_map[age] if age in age_map else age stat_per_ages[age] += 1 dataset_pictures.append(PictureInfo(image_path, age, gender)) print(f"Missed annots for images: {missed}/{total}") print(f"Missed genders: {missed_gender}") print(f"Missed ages: {missed_age}") print(f"Missed ages and gender: {missed_gender_and_age}") print(f"\nPer gender images: {stat_per_gender}") ages = list(stat_per_ages.keys()) print(f"Per ages categories ({len(ages)} cats) :") ages = sorted(ages, key=lambda x: int(x.split("(")[-1].split(",")[0].strip())) for age in ages: print(f"Age: {age} Count: {stat_per_ages[age]}") return dataset_pictures def main(faces_dir: str, annotations: List[str], data_dir: str, detector_cfg: dict = None): """ Generate a .txt annotation file with columns: ["img_name", "age", "gender", "face_x0", "face_y0", "face_x1", "face_y1", "person_x0", "person_y0", "person_x1", "person_y1"] All person bboxes here will be set to [-1, -1, -1, -1] If detector_cfg is set, for each face bbox will be refined using detector. Also, other detected faces wil be written to txt file (needed for further preprocessing) """ # out directory for annotations out_dir = os.path.join(data_dir, "annotations") os.makedirs(out_dir, exist_ok=True) # load annotations images: List[PictureInfo] = read_data(faces_dir, annotations, data_dir) if detector_cfg: # detect faces with yolo detector faces_not_found, images_with_other_faces = 0, 0 other_faces: List[PictureInfo] = [] detector_weights, device = detector_cfg["weights"], detector_cfg["device"] detector = Detector(detector_weights, device, verbose=False, conf_thresh=0.1, iou_thresh=0.2) for image_info in tqdm.tqdm(images, desc="Detecting faces: "): cv_im = cv2.imread(image_info.image_path) im_h, im_w = cv_im.shape[:2] detected_objects: PersonAndFaceResult = detector.predict(cv_im) main_bbox, other_bboxes_inds = get_main_face(detected_objects) if main_bbox is None: # use a full image as face bbox faces_not_found += 1 image_info.bbox = [0, 0, im_w, im_h] else: image_info.bbox = main_bbox if len(other_bboxes_inds): images_with_other_faces += 1 additional_faces = get_additional_bboxes(detected_objects, other_bboxes_inds, image_info.image_path) other_faces.extend(additional_faces) print(f"Faces not detected: {faces_not_found}/{len(images)}") print(f"Images with other faces: {images_with_other_faces}/{len(images)}") print(f"Other faces: {len(other_faces)}") images = images + other_faces else: # use a full image as face bbox for image_info in tqdm.tqdm(images, desc="Collect face bboxes: "): cv_im = cv2.imread(image_info.image_path) im_h, im_w = cv_im.shape[:2] image_info.bbox = [0, 0, im_w, im_h] # xyxy save_annotations(images, faces_dir, out_file=os.path.join(out_dir, "adience_annotations.csv")) def get_parser(): parser = argparse.ArgumentParser(description="Adience") parser.add_argument( "--dataset_path", default="data/adience", type=str, required=True, help="path to dataset with faces/ and fold_{i}_data.txt files", ) parser.add_argument( "--detector_weights", default=None, type=str, required=False, help="path to face and person detector" ) parser.add_argument("--device", default="cuda:0", type=str, required=False, help="device to inference detector") return parser if __name__ == "__main__": parser = get_parser() args = parser.parse_args() data_dir = args.dataset_path faces_dir = os.path.join(data_dir, "faces") if data_dir[-1] == "/": data_dir = data_dir[:-1] annotations = [ os.path.join(data_dir, "fold_0_data.txt"), os.path.join(data_dir, "fold_1_data.txt"), os.path.join(data_dir, "fold_2_data.txt"), os.path.join(data_dir, "fold_3_data.txt"), os.path.join(data_dir, "fold_4_data.txt"), ] detector_cfg: Optional[Dict[str, str]] = None if args.detector_weights is not None: detector_cfg = {"weights": args.detector_weights, "device": "cuda:0"} main(faces_dir, annotations, data_dir, detector_cfg)