|
import csv |
|
import torch.multiprocessing as multiprocessing |
|
import pandas as pd |
|
import numpy as np |
|
import torchvision.transforms as transforms |
|
from torch import autocast |
|
from torch.utils.data import Dataset, DataLoader |
|
from PIL import Image |
|
import torch |
|
from torchvision.transforms import InterpolationMode |
|
from tqdm import tqdm |
|
import random |
|
import json |
|
|
|
torch.backends.cuda.matmul.allow_tf32 = True |
|
torch.backends.cudnn.allow_tf32 = True |
|
|
|
torch.autograd.set_detect_anomaly(False) |
|
|
|
torch.autograd.profiler.emit_nvtx(enabled=False) |
|
torch.autograd.profiler.profile(enabled=False) |
|
torch.backends.cudnn.benchmark = True |
|
|
|
class ImageDataset(Dataset): |
|
def __init__(self, csv_file, train, base_path): |
|
|
|
self.csv_file = csv_file |
|
self.train = train |
|
self.all_image_names = self.csv_file[:]['md5'].apply(str) |
|
self.all_image_ext = self.csv_file[:]['file_ext'].apply(str) |
|
self.train_size = len(self.csv_file) |
|
self.base_path = base_path |
|
if self.train == True: |
|
print(f"Number of training images: {self.train_size}") |
|
self.thin_transform = transforms.Compose([ |
|
transforms.Resize(224, interpolation=InterpolationMode.BICUBIC), |
|
transforms.CenterCrop(224), |
|
transforms.ToTensor(), |
|
transforms.Normalize(mean=[ |
|
0.48145466, |
|
0.4578275, |
|
0.40821073 |
|
], std=[ |
|
0.26862954, |
|
0.26130258, |
|
0.27577711 |
|
]) |
|
|
|
]) |
|
self.normal_transform = transforms.Compose([ |
|
transforms.Resize((224, 224), interpolation=InterpolationMode.BICUBIC), |
|
transforms.ToTensor(), |
|
transforms.Normalize(mean=[ |
|
0.48145466, |
|
0.4578275, |
|
0.40821073 |
|
], std=[ |
|
0.26862954, |
|
0.26130258, |
|
0.27577711 |
|
]) |
|
|
|
]) |
|
|
|
def __len__(self): |
|
return len(self.all_image_names) |
|
|
|
def __getitem__(self, index): |
|
image = Image.open(self.base_path+"/"+str(self.all_image_names[index])+str(self.all_image_ext[index])).convert("RGB") |
|
ratio = image.height/image.width |
|
if ratio > 2.0 or ratio < 0.5: |
|
image = self.thin_transform(image) |
|
else: |
|
image = self.normal_transform(image) |
|
|
|
|
|
return { |
|
'image': image, |
|
"image_name": self.all_image_names[index] |
|
} |
|
|
|
|
|
def prepare_model(): |
|
model = torch.load("path/to/your/model.pth").to("cuda") |
|
model.to(memory_format=torch.channels_last) |
|
model = model.eval() |
|
return model |
|
|
|
|
|
def train(tagging_is_running, model, dataloader, train_data, output_queue): |
|
print('Begin tagging') |
|
model.eval() |
|
counter = 0 |
|
|
|
with torch.no_grad(): |
|
for i, data in tqdm(enumerate(dataloader), total=int(len(train_data) / dataloader.batch_size)): |
|
|
|
data, image_names = data['image'].to("cuda"), data["image_name"] |
|
with autocast(device_type='cuda', dtype=torch.bfloat16): |
|
outputs = model(data) |
|
|
|
probabilities = torch.nn.functional.sigmoid(outputs) |
|
output_queue.put((probabilities.to("cpu"), image_names)) |
|
|
|
counter += 1 |
|
_ = tagging_is_running.get() |
|
print("Tagging finished!") |
|
|
|
|
|
def tag_writer(tagging_is_running, output_queue, output_file_name): |
|
with open("tags.json", "r") as file: |
|
tags = json.load(file) |
|
allowed_tags = sorted(tags) |
|
del tags |
|
allowed_tags.extend(["placeholder0", "placeholder1", "placeholder2"]) |
|
tag_count = len(allowed_tags) |
|
assert tag_count == 7704, f"The length of loss scaling factor is not correct. Correct: 7704, current: {tag_count}" |
|
|
|
with open(output_file_name, "w") as output_csv: |
|
writer = csv.writer(output_csv) |
|
writer.writerow(["image_name", "tags", "tag_probs"]) |
|
while not (tagging_is_running.qsize()>0 and output_queue.qsize()>0): |
|
tag_probabilities, image_names = output_queue.get() |
|
tag_probabilities = tag_probabilities.tolist() |
|
|
|
for per_image_tag_probabilities,image_name in zip(tag_probabilities, image_names, strict=True): |
|
this_image_tags = [] |
|
this_image_tag_probabilities = [] |
|
for index, per_tag_probability in enumerate(per_image_tag_probabilities): |
|
if per_tag_probability > 0.3: |
|
tag = allowed_tags[index] |
|
if "placeholder" not in tag: |
|
this_image_tags.append(tag) |
|
this_image_tag_probabilities.append(str(int(round(per_tag_probability, 3) * 1000))) |
|
image_row = [image_name," ".join(this_image_tags)," ".join(this_image_tag_probabilities)] |
|
writer.writerow(image_row) |
|
|
|
|
|
|
|
|
|
|
|
|
|
def set_seed(seed: int = 42) -> None: |
|
np.random.seed(seed) |
|
random.seed(seed) |
|
torch.manual_seed(seed) |
|
torch.cuda.manual_seed(seed) |
|
|
|
torch.backends.cudnn.deterministic = True |
|
torch.backends.cudnn.benchmark = False |
|
|
|
print(f"Random seed set as {seed}") |
|
|
|
|
|
if __name__ == "__main__": |
|
steps = 0 |
|
output_file_name = "your_file.csv" |
|
set_seed() |
|
multiprocessing.set_start_method('spawn') |
|
output_queue = multiprocessing.Queue() |
|
tagging_is_running = multiprocessing.Queue(maxsize=5) |
|
tagging_is_running.put("Running!") |
|
|
|
|
|
if torch.cuda.is_available(): |
|
device = torch.device('cuda') |
|
else: |
|
raise RuntimeError("CUDA is not available!") |
|
|
|
model = prepare_model().to("cuda") |
|
batch_size = 128 |
|
|
|
|
|
|
|
train_csv = pd.read_csv('/path/to/a/list/of/files/and/their/extensions.csv') |
|
|
|
train_data = ImageDataset( |
|
train_csv, train=True |
|
) |
|
|
|
train_loader = DataLoader( |
|
train_data, |
|
batch_size=batch_size, |
|
shuffle=False, |
|
num_workers=6, |
|
pin_memory=True |
|
) |
|
process_writer = multiprocessing.Process(target=tag_writer, args=(tagging_is_running, output_queue, output_file_name)) |
|
process_writer.start() |
|
process_tagger = multiprocessing.Process(target=train, args=(tagging_is_running, model, train_loader, train_data, output_queue,)) |
|
process_tagger.start() |
|
process_writer.join() |
|
process_tagger.join() |
|
|