gyrojeff's picture
feat: add tanh
68dd12a
raw
history blame
4.27 kB
from font_dataset.fontlabel import FontLabel
from font_dataset.font import DSFont, load_font_with_exclusion
from . import config
import math
import os
import pickle
import torch
import torchvision.transforms as transforms
from typing import List, Dict, Tuple
from torch.utils.data import Dataset, DataLoader
from pytorch_lightning import LightningDataModule
from PIL import Image
class FontDataset(Dataset):
def __init__(self, path: str, config_path: str = "configs/font.yml", regression_use_tanh: bool=False):
self.path = path
self.fonts = load_font_with_exclusion(config_path)
self.regression_use_tanh = regression_use_tanh
self.images = [
os.path.join(path, f) for f in os.listdir(path) if f.endswith(".jpg")
]
self.images.sort()
def __len__(self):
return len(self.images)
def fontlabel2tensor(self, label: FontLabel, label_path) -> torch.Tensor:
out = torch.zeros(12, dtype=torch.float)
try:
out[0] = self.fonts[label.font.path]
except KeyError:
print(f"Unqualified font: {label.font.path}")
print(f"Label path: {label_path}")
raise KeyError
out[1] = 0 if label.text_direction == "ltr" else 1
# [0, 1]
out[2] = label.text_color[0] / 255.0
out[3] = label.text_color[1] / 255.0
out[4] = label.text_color[2] / 255.0
out[5] = label.text_size / label.image_width
out[6] = label.stroke_width / label.image_width
if label.stroke_color:
out[7] = label.stroke_color[0] / 255.0
out[8] = label.stroke_color[1] / 255.0
out[9] = label.stroke_color[2] / 255.0
else:
out[7:10] = out[2:5]
out[10] = label.line_spacing / label.image_width
out[11] = label.angle / 180.0 + 0.5
if self.regression_use_tanh:
out[2:12] = out[2:12] * 2 - 1
return out
def __getitem__(self, index: int) -> Tuple[torch.Tensor, torch.Tensor]:
# Load image
image_path = self.images[index]
image = Image.open(image_path).convert("RGB")
transform = transforms.Compose(
[
transforms.Resize((config.INPUT_SIZE, config.INPUT_SIZE)),
transforms.ToTensor(),
]
)
image = transform(image)
# Load label
label_path = image_path.replace(".jpg", ".bin")
with open(label_path, "rb") as f:
label: FontLabel = pickle.load(f)
# encode label
label = self.fontlabel2tensor(label, label_path)
return image, label
class FontDataModule(LightningDataModule):
def __init__(
self,
config_path: str = "configs/font.yml",
train_path: str = "./dataset/font_img/train",
val_path: str = "./dataset/font_img/val",
test_path: str = "./dataset/font_img/test",
train_shuffle: bool = True,
val_shuffle: bool = False,
test_shuffle: bool = False,
regression_use_tanh: bool = False,
**kwargs,
):
super().__init__()
self.dataloader_args = kwargs
self.train_shuffle = train_shuffle
self.val_shuffle = val_shuffle
self.test_shuffle = test_shuffle
self.train_dataset = FontDataset(train_path, config_path, regression_use_tanh)
self.val_dataset = FontDataset(val_path, config_path, regression_use_tanh)
self.test_dataset = FontDataset(test_path, config_path, regression_use_tanh)
def get_train_num_iter(self, num_device: int) -> int:
return math.ceil(
len(self.train_dataset) / (self.dataloader_args["batch_size"] * num_device)
)
def train_dataloader(self):
return DataLoader(
self.train_dataset,
shuffle=self.train_shuffle,
**self.dataloader_args,
)
def val_dataloader(self):
return DataLoader(
self.val_dataset,
shuffle=self.val_shuffle,
**self.dataloader_args,
)
def test_dataloader(self):
return DataLoader(
self.test_dataset,
shuffle=self.test_shuffle,
**self.dataloader_args,
)