Spaces:
Sleeping
Sleeping
File size: 9,832 Bytes
e721a5b 77a62ab e721a5b 77a62ab e721a5b |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 |
# -*- coding: utf-8 -*-
import os
import numpy as np
from tqdm import tqdm
import tensorflow as tf
import cv2
import argparse
import typing
import h5py
# 解析命令行参数
def parse_opt(known=False):
parser = argparse.ArgumentParser()
parser.add_argument("--content_img_path", type=str, default="./images/1.jpg", help="原图路径")
parser.add_argument("--style_img_path", type=str, default="./images/style.jpg", help="风格图片路径")
parser.add_argument("--output_path", type=str, default="./output/1", help="生成图片保存路径")
parser.add_argument("--epochs", type=int, default=20, help="总训练轮数")
parser.add_argument("--step_per_epoch", type=int, default=100, help="每轮训练次数")
parser.add_argument("--learning_rate", type=float, default=0.01, help="学习率")
parser.add_argument("--content_loss_factor", type=float, default=1.0, help="内容损失总加权系数")
parser.add_argument("--style_loss_factor", type=float, default=100.0, help="风格损失总加权系数")
parser.add_argument("--img_size", type=int, default=0, help="图片尺寸,0代表不设置使用默认尺寸(450*300),输入1代表使用图片尺寸,其他输入代表使用自定义尺寸")
parser.add_argument("--img_width", type=int, default=450, help="自定义图片宽度")
parser.add_argument("--img_height", type=int, default=300, help="自定义图片高度")
opt = parser.parse_known_args()[0] if known else parser.parse_args()
return opt
def load_images(image_path, width, height):
"""
加载并处理图片,返回一个张量
"""
x = tf.io.read_file(image_path)
x = tf.image.decode_jpeg(x, channels=3)
x = tf.image.resize(x, [height, width])
x = x / 255.0
x = normalization(x)
x = tf.reshape(x, [1, height, width, 3])
return x
def load_images_from_list(image_array, width, height):
"""
从numpy数组加载并处理图片,返回一个张量
"""
x = tf.convert_to_tensor(image_array, dtype=tf.float32)
x = tf.image.resize(x, [height, width])
x = x / 255.0
x = normalization(x)
x = tf.reshape(x, [1, height, width, 3])
return x
def save_image(image, filename):
"""
保存图片
"""
x = tf.reshape(image, image.shape[1:])
x = x * image_std + image_mean
x = x * 255.0
x = tf.cast(x, tf.int32)
x = tf.clip_by_value(x, 0, 255)
x = tf.cast(x, tf.uint8)
x = tf.image.encode_jpeg(x)
tf.io.write_file(filename, x)
def save_image_for_gradio(image):
"""
将图片保存为numpy数组
"""
x = tf.reshape(image, image.shape[1:])
x = x * image_std + image_mean
x = x * 255.0
x = tf.cast(x, tf.int32)
x = tf.clip_by_value(x, 0, 255)
x = tf.cast(x, tf.uint8)
numpy_array = x.numpy() # 将TensorFlow张量转换为numpy数组
return numpy_array
def get_vgg19_model(layers):
"""
创建并初始化vgg19模型
"""
vgg = tf.keras.applications.VGG19(include_top=False, weights="imagenet")
outputs = [vgg.get_layer(layer).output for layer in layers]
model = tf.keras.Model(vgg.input, outputs)
model.trainable = False
return model
class NeuralStyleTransferModel(tf.keras.Model):
def __init__(self, content_layers: typing.Dict[str, float], style_layers: typing.Dict[str, float]):
super(NeuralStyleTransferModel, self).__init__()
self.content_layers = content_layers
self.style_layers = style_layers
layers = list(self.content_layers.keys()) + list(self.style_layers.keys())
self.outputs_index_map = dict(zip(layers, range(len(layers))))
self.vgg = get_vgg19_model(layers)
def call(self, inputs, training=None, mask=None):
outputs = self.vgg(inputs)
content_outputs = []
for layer, factor in self.content_layers.items():
content_outputs.append((outputs[self.outputs_index_map[layer]][0], factor))
style_outputs = []
for layer, factor in self.style_layers.items():
style_outputs.append((outputs[self.outputs_index_map[layer]][0], factor))
return {"content": content_outputs, "style": style_outputs}
def normalization(x):
"""
对输入图片进行归一化处理,返回归一化后的值
"""
return (x - image_mean) / image_std
def _compute_content_loss(noise_features, target_features):
"""
计算指定层上两个特征之间的内容损失
"""
content_loss = tf.reduce_sum(tf.square(noise_features - target_features))
x = 2.0 * M * N
content_loss = content_loss / x
return content_loss
def compute_content_loss(noise_content_features, target_content_features):
"""
计算并返回当前图片的内容损失
"""
content_losses = []
for (noise_feature, factor), (target_feature, _) in zip(noise_content_features, target_content_features):
layer_content_loss = _compute_content_loss(noise_feature, target_feature)
content_losses.append(layer_content_loss * factor)
return tf.reduce_sum(content_losses)
def gram_matrix(feature):
"""
计算给定特征的格拉姆矩阵
"""
x = tf.transpose(feature, perm=[2, 0, 1])
x = tf.reshape(x, (x.shape[0], -1))
return x @ tf.transpose(x)
def _compute_style_loss(noise_feature, target_feature):
"""
计算指定层上两个特征之间的风格损失
"""
noise_gram_matrix = gram_matrix(noise_feature)
style_gram_matrix = gram_matrix(target_feature)
style_loss = tf.reduce_sum(tf.square(noise_gram_matrix - style_gram_matrix))
x = 4.0 * (M**2) * (N**2)
return style_loss / x
def compute_style_loss(noise_style_features, target_style_features):
"""
计算并返回图片的风格损失
"""
style_losses = []
for (noise_feature, factor), (target_feature, _) in zip(noise_style_features, target_style_features):
layer_style_loss = _compute_style_loss(noise_feature, target_feature)
style_losses.append(layer_style_loss * factor)
return tf.reduce_sum(style_losses)
def total_loss(noise_features, target_content_features, target_style_features):
"""
计算总损失
"""
content_loss = compute_content_loss(noise_features["content"], target_content_features)
style_loss = compute_style_loss(noise_features["style"], target_style_features)
return content_loss * CONTENT_LOSS_FACTOR + style_loss * STYLE_LOSS_FACTOR
@tf.function
def train_one_step(model, noise_image, optimizer, target_content_features, target_style_features):
"""
一次迭代过程
"""
with tf.GradientTape() as tape:
noise_outputs = model(noise_image)
loss = total_loss(noise_outputs, target_content_features, target_style_features)
grad = tape.gradient(loss, noise_image)
optimizer.apply_gradients([(grad, noise_image)])
return loss
def main(content_img, style_img, epochs, step_per_epoch, learning_rate, content_loss_factor, style_loss_factor, img_size, img_width, img_height):
global CONTENT_LOSS_FACTOR, STYLE_LOSS_FACTOR, CONTENT_IMAGE_PATH, STYLE_IMAGE_PATH, OUTPUT_DIR, EPOCHS, LEARNING_RATE, STEPS_PER_EPOCH, M, N, image_mean, image_std, IMG_WIDTH, IMG_HEIGHT
with tf.device('/cuda:0'):
CONTENT_LOSS_FACTOR = content_loss_factor
STYLE_LOSS_FACTOR = style_loss_factor
CONTENT_IMAGE_PATH = content_img
STYLE_IMAGE_PATH = style_img
EPOCHS = epochs
LEARNING_RATE = learning_rate
STEPS_PER_EPOCH = step_per_epoch
# 内容特征层及损失加权系数
CONTENT_LAYERS = {"block4_conv2": 0.5, "block5_conv2": 0.5}
# 风格特征层及损失加权系数
STYLE_LAYERS = {
"block1_conv1": 0.2,
"block2_conv1": 0.2,
"block3_conv1": 0.2,
"block4_conv1": 0.2,
"block5_conv1": 0.2,
}
if img_size == "default size":
IMG_WIDTH = 450
IMG_HEIGHT = 300
else:
IMG_WIDTH = img_width
IMG_HEIGHT = img_height
print("IMG_WIDTH:", IMG_WIDTH)
print("IMG_HEIGHT:", IMG_HEIGHT)
# 我们准备使用经典网络在imagenet数据集上的预训练权重,所以归一化时也要使用imagenet的平均值和标准差
image_mean = tf.constant([0.485, 0.456, 0.406])
image_std = tf.constant([0.299, 0.224, 0.225])
model = NeuralStyleTransferModel(CONTENT_LAYERS, STYLE_LAYERS)
content_image = load_images_from_list(CONTENT_IMAGE_PATH, IMG_WIDTH, IMG_HEIGHT)
style_image = load_images_from_list(STYLE_IMAGE_PATH, IMG_WIDTH, IMG_HEIGHT)
target_content_features = model(content_image)["content"]
target_style_features = model(style_image)["style"]
M = IMG_WIDTH * IMG_HEIGHT
N = 3
optimizer = tf.keras.optimizers.Adam(LEARNING_RATE)
noise_image = tf.Variable((content_image[0] + np.random.uniform(-0.2, 0.2, (1, IMG_HEIGHT, IMG_WIDTH, 3))) / 2)
for epoch in range(EPOCHS):
with tqdm(total=STEPS_PER_EPOCH, desc="Epoch {}/{}".format(epoch + 1, EPOCHS)) as pbar:
for step in range(STEPS_PER_EPOCH):
_loss = train_one_step(model, noise_image, optimizer, target_content_features, target_style_features)
pbar.set_postfix({"loss": "%.4f" % float(_loss)})
pbar.update(1)
return save_image_for_gradio(noise_image)
if __name__ == "__main__":
print(tf.config.list_physical_devices('GPU'))
opt = parse_opt()
main(opt.content_img_path, opt.style_img_path, opt.epochs, opt.step_per_epoch, opt.learning_rate, opt.content_loss_factor, opt.style_loss_factor, opt.img_size, opt.img_width, opt.img_height)
|