style_transfer / app.py
vidscbvsdCHEN's picture
Update app.py
f4a69a4 verified
import os
import numpy as np
import tensorflow as tf
from tqdm import tqdm
import gradio as gr
import typing
from huggingface_hub import HfApi, Repository
import tempfile
# 定义模型和辅助函数
print("Importing necessary libraries and defining functions...")
CONTENT_LAYERS = {'block4_conv2': 0.5, 'block5_conv2': 0.5}
STYLE_LAYERS = {'block1_conv1': 0.2, 'block2_conv1': 0.2, 'block3_conv1': 0.2, 'block4_conv1': 0.2, 'block5_conv1': 0.2}
CONTENT_LOSS_FACTOR = 1
STYLE_LOSS_FACTOR = 100
WIDTH = 450
HEIGHT = 300
EPOCHS = 20
STEPS_PER_EPOCH = 100
LEARNING_RATE = 0.03
image_mean = tf.constant([0.485, 0.456, 0.406])
image_std = tf.constant([0.299, 0.224, 0.225])
def normalization(x):
return (x - image_mean) / image_std
def load_images(image_path, width=WIDTH, height=HEIGHT):
x = tf.io.read_file(image_path)
x = tf.image.decode_jpeg(x, channels=3)
x = tf.image.resize(x, [height, width])
x = x / 255.
x = normalization(x)
x = tf.reshape(x, [1, height, width, 3])
return x
def save_image(image, filename):
x = tf.reshape(image, image.shape[1:])
x = x * image_std + image_mean
x = x * 255.
x = tf.cast(x, tf.int32)
x = tf.clip_by_value(x, 0, 255)
x = tf.cast(x, tf.uint8)
x = tf.image.encode_jpeg(x)
tf.io.write_file(filename, x)
def get_vgg19_model(layers):
vgg = tf.keras.applications.VGG19(include_top=False, weights='imagenet')
outputs = [vgg.get_layer(layer).output for layer in layers]
model = tf.keras.Model([vgg.input, ], outputs)
model.trainable = False
return model
class NeuralStyleTransferModel(tf.keras.Model):
def __init__(self, content_layers=CONTENT_LAYERS, style_layers=STYLE_LAYERS):
super(NeuralStyleTransferModel, self).__init__()
self.content_layers = content_layers
self.style_layers = style_layers
layers = list(self.content_layers.keys()) + list(self.style_layers.keys())
self.outputs_index_map = dict(zip(layers, range(len(layers))))
self.vgg = get_vgg19_model(layers)
def call(self, inputs, training=None, mask=None):
outputs = self.vgg(inputs)
content_outputs = []
for layer, factor in self.content_layers.items():
content_outputs.append((outputs[self.outputs_index_map[layer]][0], factor))
style_outputs = []
for layer, factor in self.style_layers.items():
style_outputs.append((outputs[self.outputs_index_map[layer]][0], factor))
return {'content': content_outputs, 'style': style_outputs}
def _compute_content_loss(noise_features, target_features):
content_loss = tf.reduce_sum(tf.square(noise_features - target_features))
x = 2. * WIDTH * HEIGHT * 3
content_loss = content_loss / x
return content_loss
def compute_content_loss(noise_content_features, target_content_features):
content_losses = []
for (noise_feature, factor), (target_feature, _) in zip(noise_content_features, target_content_features):
layer_content_loss = _compute_content_loss(noise_feature, target_feature)
content_losses.append(layer_content_loss * factor)
return tf.reduce_sum(content_losses)
def gram_matrix(feature):
x = tf.transpose(feature, perm=[2, 0, 1])
x = tf.reshape(x, (x.shape[0], -1))
return x @ tf.transpose(x)
def _compute_style_loss(noise_feature, target_feature):
noise_gram_matrix = gram_matrix(noise_feature)
style_gram_matrix = gram_matrix(target_feature)
style_loss = tf.reduce_sum(tf.square(noise_gram_matrix - style_gram_matrix))
x = 4. * (WIDTH * HEIGHT) ** 2 * 3 ** 2
return style_loss / x
def compute_style_loss(noise_style_features, target_style_features):
style_losses = []
for (noise_feature, factor), (target_feature, _) in zip(noise_style_features, target_style_features):
layer_style_loss = _compute_style_loss(noise_feature, target_feature)
style_losses.append(layer_style_loss * factor)
return tf.reduce_sum(style_losses)
def total_loss(noise_features, target_content_features, target_style_features):
content_loss = compute_content_loss(noise_features['content'], target_content_features)
style_loss = compute_style_loss(noise_features['style'], target_style_features)
return content_loss * CONTENT_LOSS_FACTOR + style_loss * STYLE_LOSS_FACTOR
optimizer = tf.keras.optimizers.Adam(LEARNING_RATE)
model = NeuralStyleTransferModel()
def neural_style_transfer(content_image_path, style_image_path):
content_image = load_images(content_image_path)
style_image = load_images(style_image_path)
target_content_features = model([content_image, ])['content']
target_style_features = model([style_image, ])['style']
noise_image = tf.Variable((content_image + np.random.uniform(-0.2, 0.2, (1, HEIGHT, WIDTH, 3))) / 2)
@tf.function
def train_one_step():
with tf.GradientTape() as tape:
noise_outputs = model(noise_image)
loss = total_loss(noise_outputs, target_content_features, target_style_features)
grad = tape.gradient(loss, noise_image)
optimizer.apply_gradients([(grad, noise_image)])
return loss
for epoch in range(EPOCHS):
for step in range(STEPS_PER_EPOCH):
_loss = train_one_step()
output_image_path = tempfile.mktemp(suffix='.jpg')
save_image(noise_image, output_image_path)
return output_image_path
def transfer_style(content_image, style_image):
content_image_path = tempfile.mktemp(suffix='.jpg')
style_image_path = tempfile.mktemp(suffix='.jpg')
content_image.save(content_image_path)
style_image.save(style_image_path)
output_image_path = neural_style_transfer(content_image_path, style_image_path)
return output_image_path
# 创建Gradio界面
iface = gr.Interface(
fn=transfer_style,
inputs=[
gr.inputs.Image(type="pil", label="Content Image"),
gr.inputs.Image(type="pil", label="Style Image")
],
outputs=gr.outputs.Image(type="file", label="Styled Image"),
title="Neural Style Transfer",
description="Upload a content image and a style image to perform neural style transfer."
)
# 运行Gradio应用
iface.launch()