Glyph-SDXL-v2

Paused

App Files Files Community

Glyph-SDXL-v2 / app.py

GlyphByT5

fix: add validity check for multilingual

64485ac verified 8 months ago

raw

history blame contribute delete

56.3 kB

	import gc
	import json
	import webcolors
	import spaces
	import gradio as gr
	import os.path as osp
	from copy import deepcopy
	from PIL import Image, ImageDraw, ImageFont

	import torch
	from diffusers import UNet2DConditionModel, AutoencoderKL
	from diffusers.models.attention import BasicTransformerBlock
	from peft import LoraConfig
	from peft.utils import set_peft_model_state_dict
	from transformers import PretrainedConfig

	from diffusers import DPMSolverMultistepScheduler

	from glyph_sdxl.utils import (
	parse_config,
	UNET_CKPT_NAME,
	huggingface_cache_dir,
	load_byt5_and_byt5_tokenizer,
	BYT5_MAPPER_CKPT_NAME,
	INSERTED_ATTN_CKPT_NAME,
	BYT5_CKPT_NAME,
	PromptFormat,
	MultilingualPromptFormat,
	)
	from glyph_sdxl.custom_diffusers import (
	StableDiffusionGlyphXLPipeline,
	CrossAttnInsertBasicTransformerBlock,
	)
	from glyph_sdxl.modules import T5EncoderBlockByT5Mapper
	from demo.constants import MAX_TEXT_BOX


	state = 0
	stack = []
	multilingual_state = 0
	multilingual_stack = []
	font = ImageFont.truetype("assets/Arial.ttf", 20)

	device = "cuda"
	pipeline = None
	pipeline_multilingual = None
	prompt_format = PromptFormat()
	multilingual_prompt_format = MultilingualPromptFormat()

	multilingual_code_dict = {
	'cn': 'Chinese',
	'en': 'English',
	'fr': 'French',
	'de': 'German',
	'es': 'Spanish',
	'it': 'Italian',
	'pt': 'Portuguese',
	'ru': 'Russian',
	'jp': 'Japanese',
	'kr': 'Korean',
	}
	multilingual_reverse_code_dict = {
	'Chinese': 'cn',
	'English': 'en',
	'French': 'en',
	'German': 'en',
	'Spanish': 'en',
	'Italian': 'en',
	'Portuguese': 'en',
	'Russian': 'en',
	'Japanese': 'jp',
	'Korean': 'kr',
	}
	multilingual_font_dict = {}
	multilingual_meta_path = 'assets/multi_fonts'

	for code in multilingual_code_dict:
	with open(osp.join(multilingual_meta_path, f"{code}.json"), 'r') as f:
	lang_font_list = json.load(f)
	multilingual_font_dict[code] = lang_font_list


	def flush():
	gc.collect()
	torch.cuda.empty_cache()

	def import_model_class_from_model_name_or_path(
	pretrained_model_name_or_path: str, revision: str, subfolder: str = "text_encoder",
	):
	text_encoder_config = PretrainedConfig.from_pretrained(
	pretrained_model_name_or_path,
	subfolder=subfolder,
	revision=revision,
	)
	model_class = text_encoder_config.architectures[0]

	if model_class == "CLIPTextModel":
	from transformers import CLIPTextModel

	return CLIPTextModel
	elif model_class == "CLIPTextModelWithProjection":
	from transformers import CLIPTextModelWithProjection

	return CLIPTextModelWithProjection
	else:
	raise ValueError(f"{model_class} is not supported.")

	def init_pipeline():

	global pipeline
	global pipeline_multilingual

	config = parse_config('configs/glyph_sdxl_albedo.py')
	ckpt_dir = 'checkpoints/glyph-sdxl'
	config_multilingual = parse_config('configs/glyph_sdxl_multilingual_albedo.py')
	ckpt_dir_multilingual = 'checkpoints/glyph-sdxl_multilingual_10-lang'

	text_encoder_cls_one = import_model_class_from_model_name_or_path(
	config.pretrained_model_name_or_path, config.revision,
	)
	text_encoder_cls_two = import_model_class_from_model_name_or_path(
	config.pretrained_model_name_or_path, config.revision, subfolder="text_encoder_2",
	)
	text_encoder_one = text_encoder_cls_one.from_pretrained(
	config.pretrained_model_name_or_path, subfolder="text_encoder", revision=config.revision,
	cache_dir=huggingface_cache_dir,
	)
	text_encoder_two = text_encoder_cls_two.from_pretrained(
	config.pretrained_model_name_or_path, subfolder="text_encoder_2", revision=config.revision,
	cache_dir=huggingface_cache_dir,
	)

	unet = UNet2DConditionModel.from_pretrained(
	config.pretrained_model_name_or_path,
	subfolder="unet",
	revision=config.revision,
	cache_dir=huggingface_cache_dir,
	)
	unet_multilingual = UNet2DConditionModel.from_pretrained(
	config_multilingual.pretrained_model_name_or_path,
	subfolder="unet",
	revision=config.revision,
	cache_dir=huggingface_cache_dir,
	)

	vae_path = (
	config.pretrained_model_name_or_path
	if config.pretrained_vae_model_name_or_path is None
	else config.pretrained_vae_model_name_or_path
	)
	vae = AutoencoderKL.from_pretrained(
	vae_path, subfolder="vae" if config.pretrained_vae_model_name_or_path is None else None,
	revision=config.revision,
	cache_dir=huggingface_cache_dir,
	)

	byt5_model, byt5_tokenizer = load_byt5_and_byt5_tokenizer(
	**config.byt5_config,
	huggingface_cache_dir=huggingface_cache_dir,
	)
	byt5_model_multilingual, byt5_tokenizer_multilingual = load_byt5_and_byt5_tokenizer(
	**config_multilingual.byt5_config,
	huggingface_cache_dir=huggingface_cache_dir,
	)

	inference_dtype = torch.float32
	if config.inference_dtype == "fp16":
	inference_dtype = torch.float16
	elif config.inference_dtype == "bf16":
	inference_dtype = torch.bfloat16

	inserted_new_modules_para_set = set()
	for name, module in unet.named_modules():
	if isinstance(module, BasicTransformerBlock) and name in config.attn_block_to_modify:
	parent_module = unet
	for n in name.split(".")[:-1]:
	parent_module = getattr(parent_module, n)
	new_block = CrossAttnInsertBasicTransformerBlock.from_transformer_block(
	module,
	byt5_model.config.d_model if config.byt5_mapper_config.sdxl_channels is None else config.byt5_mapper_config.sdxl_channels,
	)
	new_block.requires_grad_(False)
	for inserted_module_name, inserted_module in zip(
	new_block.get_inserted_modules_names(),
	new_block.get_inserted_modules()
	):
	inserted_module.requires_grad_(True)
	for para_name, para in inserted_module.named_parameters():
	para_key = name + '.' + inserted_module_name + '.' + para_name
	assert para_key not in inserted_new_modules_para_set
	inserted_new_modules_para_set.add(para_key)
	for origin_module in new_block.get_origin_modules():
	origin_module.to(dtype=inference_dtype)
	parent_module.register_module(name.split(".")[-1], new_block)
	print(f"inserted cross attn block to {name}")

	inserted_new_modules_para_set_multilingual = set()
	for name, module in unet_multilingual.named_modules():
	if isinstance(module, BasicTransformerBlock) and name in config_multilingual.attn_block_to_modify:
	parent_module = unet_multilingual
	for n in name.split(".")[:-1]:
	parent_module = getattr(parent_module, n)
	new_block = CrossAttnInsertBasicTransformerBlock.from_transformer_block(
	module,
	byt5_model.config.d_model if config_multilingual.byt5_mapper_config.sdxl_channels is None else config_multilingual.byt5_mapper_config.sdxl_channels,
	)
	new_block.requires_grad_(False)
	for inserted_module_name, inserted_module in zip(
	new_block.get_inserted_modules_names(),
	new_block.get_inserted_modules()
	):
	inserted_module.requires_grad_(True)
	for para_name, para in inserted_module.named_parameters():
	para_key = name + '.' + inserted_module_name + '.' + para_name
	assert para_key not in inserted_new_modules_para_set_multilingual
	inserted_new_modules_para_set_multilingual.add(para_key)
	for origin_module in new_block.get_origin_modules():
	origin_module.to(dtype=inference_dtype)
	parent_module.register_module(name.split(".")[-1], new_block)
	print(f"inserted cross attn block to {name}")

	byt5_mapper_dict = [T5EncoderBlockByT5Mapper]
	byt5_mapper_dict = {mapper.__name__: mapper for mapper in byt5_mapper_dict}
	byt5_mapper = byt5_mapper_dict[config.byt5_mapper_type](
	byt5_model.config,
	**config.byt5_mapper_config,
	)
	byt5_mapper_multilingual = byt5_mapper_dict[config_multilingual.byt5_mapper_type](
	byt5_model.config,
	**config_multilingual.byt5_mapper_config,
	)

	unet_lora_target_modules = [
	"attn1.to_k", "attn1.to_q", "attn1.to_v", "attn1.to_out.0",
	"attn2.to_k", "attn2.to_q", "attn2.to_v", "attn2.to_out.0",
	]
	unet_lora_config = LoraConfig(
	r=config.unet_lora_rank,
	lora_alpha=config.unet_lora_rank,
	init_lora_weights="gaussian",
	target_modules=unet_lora_target_modules,
	)
	unet.add_adapter(unet_lora_config)
	unet_lora_config_multilingual = LoraConfig(
	r=config_multilingual.unet_lora_rank,
	lora_alpha=config_multilingual.unet_lora_rank,
	init_lora_weights="gaussian",
	target_modules=unet_lora_target_modules,
	)
	unet_multilingual.add_adapter(unet_lora_config_multilingual)

	unet_lora_layers_para = torch.load(osp.join(ckpt_dir, UNET_CKPT_NAME), map_location='cpu')
	incompatible_keys = set_peft_model_state_dict(unet, unet_lora_layers_para, adapter_name="default")
	if getattr(incompatible_keys, 'unexpected_keys', []) == []:
	print(f"loaded unet_lora_layers_para")
	else:
	print(f"unet_lora_layers has unexpected_keys: {getattr(incompatible_keys, 'unexpected_keys', None)}")
	unet_lora_layers_para_multilingual = torch.load(osp.join(ckpt_dir_multilingual, UNET_CKPT_NAME), map_location='cpu')
	incompatible_keys = set_peft_model_state_dict(unet_multilingual, unet_lora_layers_para_multilingual, adapter_name="default")
	if getattr(incompatible_keys, 'unexpected_keys', []) == []:
	print(f"loaded unet_lora_layers_para_multilingual")
	else:
	print(f"unet_lora_layers_multilingual has unexpected_keys: {getattr(incompatible_keys, 'unexpected_keys', None)}")

	inserted_attn_module_paras = torch.load(osp.join(ckpt_dir, INSERTED_ATTN_CKPT_NAME), map_location='cpu')
	missing_keys, unexpected_keys = unet.load_state_dict(inserted_attn_module_paras, strict=False)
	assert len(unexpected_keys) == 0, unexpected_keys
	inserted_attn_module_paras_multilingual = torch.load(osp.join(ckpt_dir_multilingual, INSERTED_ATTN_CKPT_NAME), map_location='cpu')
	missing_keys, unexpected_keys = unet_multilingual.load_state_dict(inserted_attn_module_paras_multilingual, strict=False)
	assert len(unexpected_keys) == 0, unexpected_keys

	byt5_mapper_para = torch.load(osp.join(ckpt_dir, BYT5_MAPPER_CKPT_NAME), map_location='cpu')
	byt5_mapper.load_state_dict(byt5_mapper_para)
	byt5_mapper_para_multilingual = torch.load(osp.join(ckpt_dir_multilingual, BYT5_MAPPER_CKPT_NAME), map_location='cpu')
	byt5_mapper_multilingual.load_state_dict(byt5_mapper_para_multilingual)

	byt5_model_para = torch.load(osp.join(ckpt_dir, BYT5_CKPT_NAME), map_location='cpu')
	byt5_model.load_state_dict(byt5_model_para)
	byt5_model_para_multilingual = torch.load(osp.join(ckpt_dir_multilingual, BYT5_CKPT_NAME), map_location='cpu')
	byt5_model_multilingual.load_state_dict(byt5_model_para_multilingual)

	pipeline = StableDiffusionGlyphXLPipeline.from_pretrained(
	config.pretrained_model_name_or_path,
	vae=vae,
	text_encoder=text_encoder_one,
	text_encoder_2=text_encoder_two,
	byt5_text_encoder=byt5_model,
	byt5_tokenizer=byt5_tokenizer,
	byt5_mapper=byt5_mapper,
	unet=unet,
	byt5_max_length=config.byt5_max_length,
	revision=config.revision,
	torch_dtype=inference_dtype,
	safety_checker=None,
	cache_dir=huggingface_cache_dir,
	)

	pipeline.scheduler = DPMSolverMultistepScheduler.from_pretrained(
	config.pretrained_model_name_or_path,
	subfolder="scheduler",
	use_karras_sigmas=True,
	)

	pipeline_multilingual = StableDiffusionGlyphXLPipeline.from_pretrained(
	config_multilingual.pretrained_model_name_or_path,
	vae=vae,
	text_encoder=text_encoder_one,
	text_encoder_2=text_encoder_two,
	byt5_text_encoder=byt5_model_multilingual,
	byt5_tokenizer=byt5_tokenizer_multilingual,
	byt5_mapper=byt5_mapper_multilingual,
	unet=unet_multilingual,
	byt5_max_length=config_multilingual.byt5_max_length,
	revision=config_multilingual.revision,
	torch_dtype=inference_dtype,
	safety_checker=None,
	cache_dir=huggingface_cache_dir,
	)

	pipeline_multilingual.scheduler = DPMSolverMultistepScheduler.from_pretrained(
	config_multilingual.pretrained_model_name_or_path,
	subfolder="scheduler",
	use_karras_sigmas=True,
	)

	# move to gpu
	if config.pretrained_vae_model_name_or_path is None:
	vae = vae.to(device, dtype=torch.float32)
	else:
	vae = vae.to(device, dtype=inference_dtype)
	text_encoder_one = text_encoder_one.to(device, dtype=inference_dtype)
	text_encoder_two = text_encoder_two.to(device, dtype=inference_dtype)
	byt5_mapper = byt5_mapper.to(device)
	byt5_model = byt5_model.to(device)
	unet = unet.to(device, dtype=inference_dtype)
	pipeline = pipeline.to(device)

	byt5_mapper_multilingual = byt5_mapper_multilingual.to(device)
	byt5_model_multilingual = byt5_model_multilingual.to(device)
	unet_multilingual = unet_multilingual.to(device, dtype=inference_dtype)
	pipeline_multilingual = pipeline_multilingual.to(device)

	def get_pixels(
	box_sketch_template,
	evt: gr.SelectData
	):
	global state
	global stack

	text_position = evt.index

	if state == 0:
	stack.append(text_position)
	state = 1
	else:
	x, y = stack.pop()
	stack.append([x, y, text_position[0], text_position[1]])
	state = 0

	print(stack)

	box_sketch_template = Image.new('RGB', (1024, 1024), (255, 255, 255))
	draw = ImageDraw.Draw(box_sketch_template)

	for i, text_position in enumerate(stack):
	if len(text_position) == 2:
	x, y = text_position
	r = 4
	leftUpPoint = (x-r, y-r)
	rightDownPoint = (x+r, y+r)

	text_color = (255, 0, 0)
	draw.text((x+2, y), str(i + 1), font=font, fill=text_color)

	draw.ellipse((leftUpPoint,rightDownPoint), fill='red')
	elif len(text_position) == 4:
	x0, y0, x1, y1 = text_position
	x0, x1 = min(x0, x1), max(x0, x1)
	y0, y1 = min(y0, y1), max(y0, y1)
	r = 4
	leftUpPoint = (x0-r, y0-r)
	rightDownPoint = (x0+r, y0+r)

	text_color = (255, 0, 0)
	draw.text((x0+2, y0), str(i + 1), font=font, fill=text_color)

	draw.rectangle((x0, y0, x1, y1), outline=(255, 0, 0))

	return box_sketch_template

	def get_pixels_multilingual(
	box_sketch_template,
	evt: gr.SelectData
	):
	global multilingual_state
	global multilingual_stack

	text_position = evt.index

	if multilingual_state == 0:
	multilingual_stack.append(text_position)
	multilingual_state = 1
	else:
	x, y = multilingual_stack.pop()
	multilingual_stack.append([x, y, text_position[0], text_position[1]])
	multilingual_state = 0

	print(multilingual_stack)

	box_sketch_template = Image.new('RGB', (1024, 1024), (255, 255, 255))
	draw = ImageDraw.Draw(box_sketch_template)

	for i, text_position in enumerate(multilingual_stack):
	if len(text_position) == 2:
	x, y = text_position
	r = 4
	leftUpPoint = (x-r, y-r)
	rightDownPoint = (x+r, y+r)

	text_color = (255, 0, 0)
	draw.text((x+2, y), str(i + 1), font=font, fill=text_color)

	draw.ellipse((leftUpPoint,rightDownPoint), fill='red')
	elif len(text_position) == 4:
	x0, y0, x1, y1 = text_position
	x0, x1 = min(x0, x1), max(x0, x1)
	y0, y1 = min(y0, y1), max(y0, y1)
	r = 4
	leftUpPoint = (x0-r, y0-r)
	rightDownPoint = (x0+r, y0+r)

	text_color = (255, 0, 0)
	draw.text((x0+2, y0), str(i + 1), font=font, fill=text_color)

	draw.rectangle((x0, y0, x1, y1), outline=(255, 0, 0))

	return box_sketch_template

	def exe_redo(
	box_sketch_template
	):
	global state
	global stack

	state = 1 - state
	if len(stack[-1]) == 2:
	stack = stack[:-1]
	else:
	x, y, _, _ = stack[-1]
	stack = stack[:-1] + [[x, y]]

	box_sketch_template = Image.new('RGB', (1024, 1024), (255, 255, 255))
	draw = ImageDraw.Draw(box_sketch_template)

	for i, text_position in enumerate(stack):
	if len(text_position) == 2:
	x, y = text_position
	r = 4
	leftUpPoint = (x-r, y-r)
	rightDownPoint = (x+r, y+r)

	text_color = (255, 0, 0)
	draw.text((x+2, y), str(i+1), font=font, fill=text_color)

	draw.ellipse((leftUpPoint, rightDownPoint), fill='red')
	elif len(text_position) == 4:
	x0, y0, x1, y1 = text_position
	x0, x1 = min(x0, x1), max(x0, x1)
	y0, y1 = min(y0, y1), max(y0, y1)
	r = 4
	leftUpPoint = (x0-r, y0-r)
	rightDownPoint = (x0+r, y0+r)

	text_color = (255, 0, 0)
	draw.text((x0+2, y0), str(i+1), font=font, fill=text_color)

	draw.rectangle((x0,y0,x1,y1), outline=(255, 0, 0))

	return box_sketch_template

	def exe_redo_multilingual(
	box_sketch_template
	):
	global multilingual_state
	global multilingual_stack

	multilingual_state = 1 - multilingual_state
	if len(multilingual_stack[-1]) == 2:
	multilingual_stack = multilingual_stack[:-1]
	else:
	x, y, _, _ = multilingual_stack[-1]
	multilingual_stack = multilingual_stack[:-1] + [[x, y]]

	box_sketch_template = Image.new('RGB', (1024, 1024), (255, 255, 255))
	draw = ImageDraw.Draw(box_sketch_template)

	for i, text_position in enumerate(multilingual_stack):
	if len(text_position) == 2:
	x, y = text_position
	r = 4
	leftUpPoint = (x-r, y-r)
	rightDownPoint = (x+r, y+r)

	text_color = (255, 0, 0)
	draw.text((x+2, y), str(i+1), font=font, fill=text_color)

	draw.ellipse((leftUpPoint, rightDownPoint), fill='red')
	elif len(text_position) == 4:
	x0, y0, x1, y1 = text_position
	x0, x1 = min(x0, x1), max(x0, x1)
	y0, y1 = min(y0, y1), max(y0, y1)
	r = 4
	leftUpPoint = (x0-r, y0-r)
	rightDownPoint = (x0+r, y0+r)

	text_color = (255, 0, 0)
	draw.text((x0+2, y0), str(i+1), font=font, fill=text_color)

	draw.rectangle((x0,y0,x1,y1), outline=(255, 0, 0))

	return box_sketch_template

	def exe_undo(
	box_sketch_template
	):
	global state
	global stack

	state = 0
	stack = []
	box_sketch_template = Image.new('RGB', (1024, 1024), (255, 255, 255))

	return box_sketch_template

	def exe_undo_multilingual(
	box_sketch_template
	):
	global multilingual_state
	global multilingual_stack

	multilingual_state = 0
	multilingual_stack = []
	box_sketch_template = Image.new('RGB', (1024, 1024), (255, 255, 255))

	return box_sketch_template

	def process_box():

	visibilities = []
	for _ in range(MAX_TEXT_BOX + 1):
	visibilities.append(gr.update(visible=False))
	for n in range(len(stack) + 1):
	visibilities[n] = gr.update(visible=True)

	# return [gr.update(visible=True), binary_matrixes, visibilities, colors]
	return [gr.update(visible=True), *visibilities]

	def process_box_multilingual():

	visibilities = []
	for _ in range(MAX_TEXT_BOX + 1):
	visibilities.append(gr.update(visible=False))
	for n in range(len(multilingual_stack) + 1):
	visibilities[n] = gr.update(visible=True)

	# return [gr.update(visible=True), binary_matrixes, visibilities, colors]
	return [gr.update(visible=True), *visibilities]

	@torch.inference_mode()
	@spaces.GPU(enable_queue=True, duration=120)
	def generate_image(bg_prompt, bg_class, bg_tags, seed, cfg, *conditions):

	stack_cp = deepcopy(stack)
	print(f"conditions: {conditions}")

	# 1. parse input
	prompts = []
	colors = []
	font_type = []
	bboxes = []
	num_boxes = len(stack_cp) if len(stack_cp[-1]) == 4 else len(stack_cp) - 1
	for i in range(num_boxes):
	prompts.append(conditions[i])
	colors.append(conditions[i + MAX_TEXT_BOX])
	font_type.append(conditions[i + MAX_TEXT_BOX * 2])

	# 2. input check
	styles = []
	if bg_prompt == "" or bg_prompt is None:
	raise gr.Error("Empty background prompt!")
	for i, (prompt, color, style) in enumerate(zip(prompts, colors, font_type)):
	if prompt == "" or prompt is None:
	raise gr.Error(f"Invalid prompt for text box {i + 1} !")
	if color is None:
	raise gr.Error(f"Invalid color for text box {i + 1} !")
	if style is None:
	raise gr.Error(f"Invalid style for text box {i + 1} !")
	bboxes.append(
	[
	stack_cp[i][0] / 1024,
	stack_cp[i][1] / 1024,
	(stack_cp[i][2] - stack_cp[i][0]) / 1024,
	(stack_cp[i][3] - stack_cp[i][1]) / 1024,
	]
	)
	styles.append(
	{
	'color': webcolors.name_to_hex(color),
	'font-family': style,
	}
	)

	# 3. format input
	if bg_class != "" and bg_class is not None:
	bg_prompt = bg_class + ". " + bg_prompt
	if bg_tags != "" and bg_tags is not None:
	bg_prompt += " Tags: " + bg_tags
	text_prompt = prompt_format.format_prompt(prompts, styles)

	print(f"bg_prompt: {bg_prompt}")
	print(f"text_prompt: {text_prompt}")

	# 4. inference
	if seed == -1:
	generator = torch.Generator(device=device)
	else:
	generator = torch.Generator(device=device).manual_seed(int(seed))
	with torch.cuda.amp.autocast():
	image = pipeline(
	prompt=bg_prompt,
	text_prompt=text_prompt,
	texts=prompts,
	bboxes=bboxes,
	num_inference_steps=50,
	guidance_scale=cfg,
	generator=generator,
	text_attn_mask=None,
	).images[0]

	flush()

	return image

	@torch.inference_mode()
	@spaces.GPU(enable_queue=True, duration=120)
	def generate_image_multilingual(bg_prompt, bg_class, bg_tags, seed, cfg, *conditions):

	stack_cp = deepcopy(multilingual_stack)
	print(f"conditions: {conditions}")

	# 1. parse input
	prompts = []
	colors = []
	font_type = []
	langs = []
	bboxes = []
	num_boxes = len(stack_cp) if len(stack_cp[-1]) == 4 else len(stack_cp) - 1

	for i in range(num_boxes):
	if conditions[i + MAX_TEXT_BOX * 2] is None:
	raise gr.Error(f"Invalid conditions for box {i + 1} !")

	for i in range(num_boxes):
	prompts.append(conditions[i])
	colors.append(conditions[i + MAX_TEXT_BOX])
	lang = conditions[i + MAX_TEXT_BOX * 2].split(":")[0].strip()
	font = conditions[i + MAX_TEXT_BOX * 2].split(":")[1].strip()
	print(conditions[i + MAX_TEXT_BOX * 2], " ", lang, " ", font)
	langs.append(multilingual_reverse_code_dict[lang])
	font_type.append(f'{multilingual_reverse_code_dict[lang]}-{font}')

	# 2. input check
	styles = []
	if bg_prompt == "" or bg_prompt is None:
	raise gr.Error("Empty background prompt!")
	for i, (prompt, color, style) in enumerate(zip(prompts, colors, font_type)):
	if prompt == "" or prompt is None:
	raise gr.Error(f"Invalid prompt for text box {i + 1} !")
	if color is None:
	raise gr.Error(f"Invalid color for text box {i + 1} !")
	if style is None:
	raise gr.Error(f"Invalid style for text box {i + 1} !")

	bboxes.append(
	[
	stack_cp[i][0] / 1024,
	stack_cp[i][1] / 1024,
	(stack_cp[i][2] - stack_cp[i][0]) / 1024,
	(stack_cp[i][3] - stack_cp[i][1]) / 1024,
	]
	)
	styles.append(
	{
	'color': webcolors.name_to_hex(color),
	'font-family': style,
	}
	)

	# 3. format input
	if bg_class != "" and bg_class is not None:
	bg_prompt = bg_class + ". " + bg_prompt
	if bg_tags != "" and bg_tags is not None:
	bg_prompt += " Tags: " + bg_tags
	text_prompt = multilingual_prompt_format.format_prompt(prompts, styles)

	print(f"bg_prompt: {bg_prompt}")
	print(f"text_prompt: {text_prompt}")

	# 4. inference
	if seed == -1:
	generator = torch.Generator(device=device)
	else:
	generator = torch.Generator(device=device).manual_seed(int(seed))
	with torch.cuda.amp.autocast():
	image = pipeline_multilingual(
	prompt=bg_prompt,
	text_prompt=text_prompt,
	texts=prompts,
	bboxes=bboxes,
	num_inference_steps=50,
	guidance_scale=cfg,
	generator=generator,
	text_attn_mask=None,
	).images[0]

	flush()

	return image

	def process_example(prev_img, bg_prompt, bg_class, bg_tags, color_str, style_str, text_str, box_str, seed, cfg):

	global stack, state

	colors = color_str.split(",")
	styles = style_str.split(";")
	boxes = box_str.split(";")
	prompts = text_str.split("**********")
	colors = [color.strip() for color in colors]
	styles = [style.strip() for style in styles]
	colors += [None] * (MAX_TEXT_BOX - len(colors))
	styles += [None] * (MAX_TEXT_BOX - len(styles))
	prompts += [""] * (MAX_TEXT_BOX - len(prompts))

	state = 0
	stack = []
	print(boxes)
	for box in boxes:
	print(box)
	box = box.strip()[1:-1]
	print(box)
	box = box.split(",")
	print(box)
	x = eval(box[0].strip()) * 1024
	y = eval(box[1].strip()) * 1024
	w = eval(box[2].strip()) * 1024
	h = eval(box[3].strip()) * 1024
	stack.append([int(x), int(y), int(x + w + 0.5), int(y + h + 0.5)])

	visibilities = []
	for _ in range(MAX_TEXT_BOX + 1):
	visibilities.append(gr.update(visible=False))
	for n in range(len(stack) + 1):
	visibilities[n] = gr.update(visible=True)

	box_sketch_template = Image.new('RGB', (1024, 1024), (255, 255, 255))
	draw = ImageDraw.Draw(box_sketch_template)

	for i, text_position in enumerate(stack):
	if len(text_position) == 2:
	x, y = text_position
	r = 4
	leftUpPoint = (x-r, y-r)
	rightDownPoint = (x+r, y+r)

	text_color = (255, 0, 0)
	draw.text((x+2, y), str(i + 1), font=font, fill=text_color)

	draw.ellipse((leftUpPoint,rightDownPoint), fill='red')
	elif len(text_position) == 4:
	x0, y0, x1, y1 = text_position
	x0, x1 = min(x0, x1), max(x0, x1)
	y0, y1 = min(y0, y1), max(y0, y1)
	r = 4
	leftUpPoint = (x0-r, y0-r)
	rightDownPoint = (x0+r, y0+r)

	text_color = (255, 0, 0)
	draw.text((x0+2, y0), str(i + 1), font=font, fill=text_color)

	draw.rectangle((x0, y0, x1, y1), outline=(255, 0, 0))

	return [
	gr.update(visible=True), box_sketch_template, seed, visibilities, colors, styles, prompts,
	]

	def process_example_multilingual(prev_img, bg_prompt, bg_class, bg_tags, color_str, style_str, text_str, box_str, seed, cfg):

	global multilingual_stack, multilingual_state

	colors = color_str.split(",")
	styles = style_str.split(";")
	print(styles)
	boxes = box_str.split(";")
	prompts = text_str.split("**********")
	colors = [color.strip() for color in colors]
	styles = [style.strip() for style in styles]
	colors += [None] * (MAX_TEXT_BOX - len(colors))
	styles += [None] * (MAX_TEXT_BOX - len(styles))
	prompts += [""] * (MAX_TEXT_BOX - len(prompts))

	multilingual_state = 0
	multilingual_stack = []
	print(boxes)
	for box in boxes:
	print(box)
	box = box.strip()[1:-1]
	print(box)
	box = box.split(",")
	print(box)
	x = eval(box[0].strip()) * 1024
	y = eval(box[1].strip()) * 1024
	w = eval(box[2].strip()) * 1024
	h = eval(box[3].strip()) * 1024
	multilingual_stack.append([int(x), int(y), int(x + w + 0.5), int(y + h + 0.5)])

	visibilities = []
	for _ in range(MAX_TEXT_BOX + 1):
	visibilities.append(gr.update(visible=False))
	for n in range(len(multilingual_stack) + 1):
	visibilities[n] = gr.update(visible=True)

	box_sketch_template = Image.new('RGB', (1024, 1024), (255, 255, 255))
	draw = ImageDraw.Draw(box_sketch_template)

	for i, text_position in enumerate(multilingual_stack):
	if len(text_position) == 2:
	x, y = text_position
	r = 4
	leftUpPoint = (x-r, y-r)
	rightDownPoint = (x+r, y+r)

	text_color = (255, 0, 0)
	draw.text((x+2, y), str(i + 1), font=font, fill=text_color)

	draw.ellipse((leftUpPoint,rightDownPoint), fill='red')
	elif len(text_position) == 4:
	x0, y0, x1, y1 = text_position
	x0, x1 = min(x0, x1), max(x0, x1)
	y0, y1 = min(y0, y1), max(y0, y1)
	r = 4
	leftUpPoint = (x0-r, y0-r)
	rightDownPoint = (x0+r, y0+r)

	text_color = (255, 0, 0)
	draw.text((x0+2, y0), str(i + 1), font=font, fill=text_color)

	draw.rectangle((x0, y0, x1, y1), outline=(255, 0, 0))

	return [
	gr.update(visible=True), box_sketch_template, seed, visibilities, colors, styles, prompts,
	]

	def build_input_block(color_idx_list, font_idx_list, examples):

	with gr.Row():
	with gr.Column(elem_id="main-image"):
	box_sketch_template = gr.Image(
	value=Image.new('RGB', (1024, 1024), (255, 255, 255)),
	sources=[],
	interactive=False,
	)

	box_sketch_template.select(get_pixels, [box_sketch_template], [box_sketch_template])

	with gr.Row():
	redo = gr.Button(value='Redo - Cancel last point')
	undo = gr.Button(value='Undo - Clear the canvas')
	redo.click(exe_redo, [box_sketch_template], [box_sketch_template])
	undo.click(exe_undo, [box_sketch_template], [box_sketch_template])

	button_layout = gr.Button("(1) I've finished my layout!", elem_id="main_button", interactive=True)

	prompts = []
	colors = []
	styles = []
	color_row = [None] * (MAX_TEXT_BOX + 1)
	with gr.Column(visible=False) as post_box:
	for n in range(MAX_TEXT_BOX + 1):
	if n == 0 :
	with gr.Row(visible=True) as color_row[n]:
	bg_prompt = gr.Textbox(label="Design prompt of background", value="")
	bg_class = gr.Textbox(label="Design type of background (optional)", value="")
	bg_tags = gr.Textbox(label="Design type of the background (optional)", value="")
	else:
	with gr.Row(visible=False) as color_row[n]:
	prompts.append(gr.Textbox(label="Prompt for box "+str(n)))
	colors.append(gr.Dropdown(
	label="Color for box "+str(n),
	choices=color_idx_list,
	))
	styles.append(gr.Dropdown(
	label="Font type for box "+str(n),
	choices=font_idx_list,
	))

	seed_ = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, value=-1, step=1)
	cfg_ = gr.Slider(label="CFG Scale", minimum=1, maximum=10, value=5)
	button_generate = gr.Button("(2) I've finished my texts, colors and styles, generate!", elem_id="main_button", interactive=True, variant='primary')

	button_layout.click(process_box, inputs=[], outputs=[post_box, *color_row])

	with gr.Column():
	output_image = gr.Image(label="Output Image", interactive=False)

	button_generate.click(generate_image, inputs=[bg_prompt, bg_class, bg_tags, seed_, cfg_, *(prompts + colors + styles)], outputs=[output_image], queue=True)

	with gr.Row():
	# examples
	color_str = gr.Textbox(label="Color list", value="", visible=False)
	style_str = gr.Textbox(label="Font type list", value="", visible=False)
	box_str = gr.Textbox(label="Bbox list", value="", visible=False)
	text_str = gr.Textbox(label="Text list", value="", visible=False)
	prev_img = gr.Image(label="Preview", visible = False)

	gr.Examples(
	examples=examples,
	inputs=[
	prev_img,
	bg_prompt,
	bg_class,
	bg_tags,
	color_str,
	style_str,
	text_str,
	box_str,
	seed_,
	cfg_
	],
	outputs=[post_box, box_sketch_template, seed_, color_row, colors, styles, prompts],
	fn=process_example,
	cache_examples=False,
	run_on_click=True,
	label='Examples',
	)

	def build_input_block_multilingual(color_idx_list, font_idx_list, examples):

	with gr.Row():
	with gr.Column(elem_id="main-image"):
	box_sketch_template = gr.Image(
	value=Image.new('RGB', (1024, 1024), (255, 255, 255)),
	sources=[],
	interactive=False,
	)

	box_sketch_template.select(get_pixels_multilingual, [box_sketch_template], [box_sketch_template])

	with gr.Row():
	redo = gr.Button(value='Redo - Cancel last point')
	undo = gr.Button(value='Undo - Clear the canvas')
	redo.click(exe_redo_multilingual, [box_sketch_template], [box_sketch_template])
	undo.click(exe_undo_multilingual, [box_sketch_template], [box_sketch_template])

	button_layout = gr.Button("(1) I've finished my layout!", elem_id="main_button", interactive=True)

	prompts = []
	colors = []
	styles = []
	color_row = [None] * (MAX_TEXT_BOX + 1)
	with gr.Column(visible=False) as post_box:
	for n in range(MAX_TEXT_BOX + 1):
	if n == 0 :
	with gr.Row(visible=True) as color_row[n]:
	bg_prompt = gr.Textbox(label="Design prompt of background", value="")
	bg_class = gr.Textbox(label="Design type of background (optional)", value="")
	bg_tags = gr.Textbox(label="Design type of the background (optional)", value="")
	else:
	with gr.Row(visible=False) as color_row[n]:
	prompts.append(gr.Textbox(label="Prompt for box "+str(n)))
	colors.append(gr.Dropdown(
	label="Color for box "+str(n),
	choices=color_idx_list,
	))
	styles.append(gr.Dropdown(
	label="Font type for box "+str(n),
	choices=font_idx_list,
	))

	seed_ = gr.Slider(label="Seed", minimum=-1, maximum=2147483647, value=-1, step=1)
	cfg_ = gr.Slider(label="CFG Scale", minimum=1, maximum=10, value=5)
	button_generate = gr.Button("(2) I've finished my texts, colors and styles, generate!", elem_id="main_button", interactive=True, variant='primary')

	button_layout.click(process_box_multilingual, inputs=[], outputs=[post_box, *color_row])

	with gr.Column():
	output_image = gr.Image(label="Output Image", interactive=False)

	button_generate.click(generate_image_multilingual, inputs=[bg_prompt, bg_class, bg_tags, seed_, cfg_, *(prompts + colors + styles)], outputs=[output_image], queue=True)

	with gr.Row():
	# examples
	color_str = gr.Textbox(label="Color list", value="", visible=False)
	style_str = gr.Textbox(label="Font type list", value="", visible=False)
	box_str = gr.Textbox(label="Bbox list", value="", visible=False)
	text_str = gr.Textbox(label="Text list", value="", visible=False)
	prev_img = gr.Image(label="Preview", visible = False)

	gr.Examples(
	examples=examples,
	inputs=[
	prev_img,
	bg_prompt,
	bg_class,
	bg_tags,
	color_str,
	style_str,
	text_str,
	box_str,
	seed_,
	cfg_
	],
	outputs=[post_box, box_sketch_template, seed_, color_row, colors, styles, prompts],
	fn=process_example_multilingual,
	cache_examples=False,
	run_on_click=True,
	label='Examples',
	)

	def main():

	init_pipeline()

	# load configs
	with open('assets/color_idx.json', 'r') as f:
	color_idx_dict = json.load(f)
	color_idx_list = list(color_idx_dict)
	with open('assets/font_idx_512.json', 'r') as f:
	eng_font_idx_dict = json.load(f)
	eng_font_idx_list = list(eng_font_idx_dict)
	multi_font_idx_list = []
	for lang in multilingual_font_dict:
	with open(f'assets/multi_fonts/{lang}.json', 'r') as f:
	lang_font_list = json.load(f)
	for font in lang_font_list:
	font_name = font[0][3:]
	multi_font_idx_list.append(f"{multilingual_code_dict[lang]}: {font_name}")

	html = f"""<h1>Glyph-ByT5: A Customized Text Encoder for Accurate Visual Text Rendering</h1>
	<h2><a href='https://glyph-byt5.github.io/'>Glyph-ByT5 Project Page</a> \|<a href='https://glyph-byt5-v2.github.io/'>Glyph-ByT5-v2 Project Page</a> \| <a href='https://arxiv.org/abs/2403.09622'>Glyph-ByT5 arXiv Paper</a> \|<a href='https://arxiv.org/abs/2406.10208'>Glyph-ByT5-v2 arXiv Paper</a> \| <a href='https://github.com/AIGText/Glyph-ByT5'>Github</a></h2>
	<p><b>We present a basic version of Glyph-SDXL, and a multilingual version Glyph-SDXL-v2 supporting up to 10 languages: English, Chinese, French, German, Spanish, Portuguese, Italian, Russian, Japanese and Korean.</b></p>
	<p><b>Note: due to limited capacity, we support 5000 chars in Chinese, 1148 chars in Japanese and 617 in Korean. Certain uncommon characters might not be supported for these three languages.</b></p>
	<p><b>Models presented in this demo are all based on albedo-xl!</b></p>
	<p><b>Try some examples at the bottom of the page to get started!</b></p>
	<p><b>Quick Guide:</b></p>
	<p>1. <b>Select bounding boxes</b> on the canvas on the left <b>by clicking twice</b>. </p>
	<p>2. Click "Redo" if you want to cancel last point, "Undo" for clearing the canvas. </p>
	<p>3. <b>Click "I've finished my layout!"</b> to start choosing specific prompts, colors and font-types. </p>
	<p>4. Enter a <b>design prompt</b> for the background image. Optionally, you can choose to specify the design categories and tags (separated by a comma). </p>
	<p>5. For each text box, <b>enter the text prompts in the text box</b> on the left, and <b>select colors and font-types from the drop boxes</b> on the right. </p>
	<p>6. <b>Click on "I've finished my texts, colors and styles, generate!"</b> to start generating!. </p>
	<style>.btn {{flex-grow: unset !important;}} </p>
	"""

	css = '''
	#color-bg{display:flex;justify-content: center;align-items: center;}
	.color-bg-item{width: 100%; height: 32px}
	#main_button{width:100%}
	<style>
	'''

	eng_examples=[
	[
	'examples/easter.webp',
	'The image features a small bunny rabbit sitting in a basket filled with various flowers. The basket is placed on a yellow background, creating a vibrant and cheerful scene. The flowers surrounding the rabbit come in different sizes and colors, adding to the overall visual appeal of the image. The rabbit appears to be the main focus of the scene, and its presence among the flowers creates a sense of harmony and balance.',
	'Facebook Post',
	'green, yellow, minimalist, easter day, happy easter day, easter, happy easter, decoration, happy, egg, spring, selebration, poster, illustration, greeting, season, design, colorful, cute, template',
	'darkolivegreen, darkolivegreen, darkolivegreen',
	'Gagalin-Regular; Gagalin-Regular; Brusher-Regular',
	'MAY ALLYOUR PRAYERS BE ANSWERED********HAVE A HAPPY********Easter Day',
	'[0.08267477203647416, 0.5355623100303951, 0.42857142857142855, 0.07477203647416414]; [0.08389057750759879, 0.1951367781155015, 0.38054711246200607, 0.03768996960486322]; [0.07537993920972644, 0.2601823708206687, 0.49544072948328266, 0.14650455927051673]',
	1,
	5
	],
	[
	'examples/shower.webp',
	'The image features a large gray elephant sitting in a field of flowers, holding a smaller elephant in its arms. The scene is quite serene and picturesque, with the two elephants being the main focus of the image. The field is filled with various flowers, creating a beautiful and vibrant backdrop for the elephants.',
	'Cards and invitations',
	'Light green, orange, Illustration, watercolor, playful, Baby shower invitation, baby boy shower invitation, baby boy, welcoming baby boy, koala baby shower invitation, baby shower invitation for baby shower, baby boy invitation, background, playful baby shower card, baby shower, card, newborn, born, Baby Shirt Baby Shower Invitation',
	'peru, olive, olivedrab, peru, peru, peru',
	'LilitaOne; Sensei-Medium; Sensei-Medium; LilitaOne; LilitaOne; LilitaOne',
	"RSVP to +123-456-7890********Olivia Wilson******Baby Shower******Please Join Us For a******In Honoring********23 November, 2021 \| 03:00 PM Fauget Hotels",
	'[0.07112462006079028, 0.6462006079027356, 0.3373860182370821, 0.026747720364741642]; [0.07051671732522796, 0.38662613981762917, 0.37264437689969604, 0.059574468085106386]; [0.07234042553191489, 0.15623100303951368, 0.6547112462006079, 0.12401215805471125]; [0.0662613981762918, 0.06747720364741641, 0.3981762917933131, 0.035866261398176294]; [0.07051671732522796, 0.31550151975683893, 0.22006079027355624, 0.03951367781155015]; [0.06990881458966565, 0.48328267477203646, 0.39878419452887537, 0.1094224924012158]',
	870745856,
	5
	],
	[
	'examples/new_year.webp',
	'The image features a white background with a variety of colorful flowers and decorations. There are several pink flowers scattered throughout the scene, with some positioned closer to the top and others near the bottom. A blue flower can also be seen in the middle of the image. The overall composition creates a visually appealing and vibrant display.',
	'Instagram Posts',
	'grey, navy, purple, pink, teal, colorful, illustration, happy, celebration, post, party, year, new, event, celebrate, happy new year, new year, countdown, sparkle, firework',
	'purple, midnightblue, black, black',
	'Caveat-Regular; Gagalin-Regular; Quicksand-Light; Quicksand-Light',
	'Happy New Year********2024******All THE BEST********A fresh start to start a change for the better.',
	'[0.2936170212765957, 0.2887537993920973, 0.40303951367781155, 0.07173252279635259]; [0.24984802431610942, 0.3951367781155015, 0.46200607902735563, 0.17203647416413373]; [0.3951367781155015, 0.1094224924012158, 0.2109422492401216, 0.02796352583586626]; [0.20911854103343466, 0.6127659574468085, 0.5586626139817629, 0.08085106382978724]',
	763905874,
	5
	],
	[
	'examples/pancake.webp',
	'The image features a stack of pancakes with syrup and strawberries on top. The pancakes are arranged in a visually appealing manner, with some pancakes placed on top of each other. The syrup is drizzled generously over the pancakes, and the strawberries are scattered around, adding a touch of color and freshness to the scene. The overall presentation of the pancakes is appetizing and inviting.',
	'Instagram Posts',
	'brown, peach, grey, modern, minimalist, simple, colorful, illustration, Instagram post, instagram, post, national pancake day, international pancake day, happy pancake day, pancake day, pancake, sweet, cake, discount, sale',
	'dimgray, white, darkolivegreen',
	'MoreSugarRegular; Chewy-Regular; Chewy-Regular',
	'Get 75% Discount for your first order********Order Now********National Pancake Day',
	'[0.043161094224924014, 0.5963525835866261, 0.2936170212765957, 0.08389057750759879]; [0.12279635258358662, 0.79209726443769, 0.26382978723404255, 0.05167173252279635]; [0.044984802431610946, 0.09787234042553192, 0.4413373860182371, 0.4158054711246201]',
	1,
	5
	]
	]

	multi_examples=[
	[
	'examples/cake.webp',
	'The image features a delicious-looking chocolate cake with chocolate frosting. The cake is placed on a white plate, which is set on a blue tablecloth. The cake appears to be a celebration, possibly a birthday or anniversary, given the presence of a candle. The overall presentation of the cake is elegant and inviting.',
	'',
	'',
	'bisque, bisque, bisque',
	'Chinese: HelloFont-ID-DianHei-EEJ; Chinese: Hellofont-ID-QingHuaXingKai; Chinese: HelloFont-ID-LingLiTi',
	'生日快乐********只愿你被这世界温柔相待********妹妹',
	'[0.601823708206687, 0.5556231003039513, 0.35501519756838906, 0.08693009118541034]; [0.6261398176291794, 0.6723404255319149, 0.3252279635258359, 0.1270516717325228]; [0.6553191489361702, 0.4401215805471125, 0.23829787234042554, 0.11063829787234042]',
	7,
	5
	],
	[
	'examples/xiaoman.webp',
	'The image portrays a young girl sitting on a large green leaf. The leaf is part of a plant with other green leaves. The girl is wearing a yellow dress and a straw hat. She is holding a small yellow flower in her hand. The background of the image is a light blue sky with a few clouds. The overall style of the image is a colorful, cartoon-like illustration.',
	'',
	'',
	'darkolivegreen, goldenrod, white, darkolivegreen, darkolivegreen',
	'Chinese: HYQiHei-AZEJ; English: TAN MERINGUE; Chinese: SourceHanSansSC-ExtraLight; Chinese: AlibabaPuHuiTi-Bold; English: SairaCondensed-Regular',
	'小满是二十四节气之一，夏季的第二个节气。该节气是指夏熟作物的籽粒开始灌浆饱满，但还未成熟，只是小满，还未大满。********2022.5.21******饱满的灵魂无畏的生长二十四节气之一******今日小满********Grain Buds',
	'[0.09969604863221884, 0.4370820668693009, 0.31124620060790276, 0.2072948328267477]; [ 0.10455927051671733, 0.09908814589665653, 0.22127659574468084, 0.034650455927051675]; [ 0.09969604863221884, 0.9398176291793313, 0.7993920972644377, 0.026747720364741642]; [ 0.09787234042553192, 0.17142857142857143, 0.4231003039513678, 0.10577507598784194]; [ 0.10091185410334347, 0.3100303951367781, 0.2772036474164134, 0.053495440729483285]',
	0,
	5
	],
	[
	'examples/ski.webp',
	'The image depicts a winter sports scene. In the foreground, there is a person on a snowboard. The snowboarder is wearing a white jacket, black pants, and a black helmet with goggles. The snowboarder is in the process of performing a trick, with one hand extended and the other hand holding the snowboard.\nThe background of the image shows a snowy landscape with trees and a clear blue sky. The overall style of the image is a digital illustration with a cartoonish and colorful aesthetic.',
	'',
	'',
	'white, white',
	'Chinese: CanvaEndeavorBlackSC; Chinese: SourceHanSansSC-Light',
	'总要来一趟哈尔滨滑雪吧**********冰雪大世界',
	'[0.19696048632218846, 0.23829787234042554, 0.6054711246200608, 0.05592705167173252]; [0.19756838905775076, 0.09422492401215805, 0.6042553191489362, 0.1209726443768997]',
	1,
	5
	],
	[
	'examples/song.webp',
	'The image features a cartoon of a fox character. The fox is standing on a stage with a microphone in front of it. The fox is wearing a pink shirt and is holding a bouquet of flowers in its left paw. The background of the image is a light pink color with a pattern of small flowers.',
	'',
	'',
	'coral',
	'Chinese: XianErTi',
	'世界儿歌日',
	'[0.08753799392097264, 0.11124620060790273, 0.8231003039513678, 0.22066869300911854]',
	1,
	5
	],
	[
	'examples/festival.webp',
	'The image shows a nighttime cityscape with a dark sky filled with stars. The city is illuminated with various lights, suggesting a bustling urban environment. The image is framed by a black border, and there is a watermark or logo in the bottom right corner, which appears to be a stylized letter \'C\'. The overall style of the image is illustrative and colorful, with a focus on the contrast between the dark sky and the brightly lit city.',
	'',
	'',
	'white, white',
	'Japanese: MotoyaMinchoMiyabiStd-W4; Japanese: JackeyFont',
	'12月30日**********除夜を祝う',
	'[0.4121580547112462, 0.08145896656534954, 0.17386018237082068, 0.02006079027355623]; [0.33069908814589666, 0.29908814589665655, 0.34772036474164136, 0.31550151975683893]',
	42,
	5
	],
	[
	'examples/woman.webp',
	'The image is a digital illustration featuring a character that appears to be a young woman with a serene expression. She is depicted with long, flowing hair and is wearing a traditional East Asian-style dress with a floral pattern. The dress is predominantly in shades of blue and green, with a hint of pink.\nThe character is seated on a bed of cherry blossoms, which are scattered around her. The blossoms are in full bloom, with their delicate pink petals and white stamens.\n\nThe background of the image is a pale, soft blue sky with a few wispy clouds. The overall atmosphere of the image is one of tranquility and serenity.',
	'',
	'',
	'saddlebrown, black, black, saddlebrown',
	'Korean: SeH-CB; Korean: SourceHanSerifSC-SemiBold; Korean: Canva_YoonGulimPro740; Korean: TDTDLatteOTF',
	'전문 메이크업 아티스트 아름다운 한복 무료 촬영********행사 기간 5월 6일-5월 8일 행사 장소 상사호 고전 마을******한복 동호회********한복 체험 국조 문화 창작전',
	'[0.2674772036474164, 0.5465045592705167, 0.1264437689969605, 0.09787234042553192]; [0.2662613981762918, 0.3161094224924012, 0.17446808510638298, 0.15987841945288753]; [0.2650455927051672, 0.10395136778115502, 0.42613981762917935, 0.07598784194528875]; [0.26261398176291795, 0.20547112462006079, 0.3009118541033435, 0.041945288753799395]',
	317314747,
	5
	],
	[
	'examples/elephant.webp',
	'The image features a large gray elephant sitting in a field of flowers, holding a smaller elephant in its arms. The scene is quite serene and picturesque, with the two elephants being the main focus of the image. The field is filled with various flowers, creating a beautiful and vibrant backdrop for the elephants.',
	'Cards and invitations',
	'Light green, orange, Illustration, watercolor, playful, Baby shower invitation, baby boy shower invitation, baby boy, welcoming baby boy, koala baby shower invitation, baby shower invitation for baby shower, baby boy invitation, background, playful baby shower card, baby shower, card, newborn, born, Baby Shirt Baby Shower Invitation',
	'peru, olive, olivedrab, peru, peru, peru',
	'Russian: TTRamillas-Italic; Russian: StadioNow-TextItalic; Russian: RubikOne-Regular; Russian: HeroLight-Regular; Russian: BebasNeueBold; Russian: SloopScriptPro-Regular',
	'Ответьте, пожалуйста, на номер +123-456-7890********Оливия Уилсон******Детский душ******Пожалуйста, присоединитесь к нам для******В ЧЕСТЬ********23 ноября, 2021 \| 15:00 Отели Фоже',
	'[0.07112462006079028, 0.6462006079027356, 0.3373860182370821, 0.026747720364741642]; [0.07051671732522796, 0.38662613981762917, 0.37264437689969604, 0.059574468085106386]; [0.07234042553191489, 0.15623100303951368, 0.6547112462006079, 0.12401215805471125]; [0.0662613981762918, 0.06747720364741641, 0.3981762917933131, 0.035866261398176294]; [0.07051671732522796, 0.31550151975683893, 0.22006079027355624, 0.03951367781155015]; [0.06990881458966565, 0.48328267477203646, 0.39878419452887537, 0.1094224924012158]',
	7,
	5
	],
	[
	'examples/earth.webp',
	'The image features a green and blue globe with a factory on top of it. The factory is surrounded by trees, giving the impression of a harmonious coexistence between the industrial structure and the natural environment. The globe is prominently displayed in the center of the image, with the factory and trees surrounding it.',
	'Posters',
	'green, modern, earth, world, planet, ecology, background, globe, environment, day, space, map, concept, global, light, hour, energy, power, protect, illustration',
	'white, white',
	'Portuguese: Gliker-Regular; Portuguese: Amsterdam-Three',
	'A TERRA É O QUE TODOS NÓS TEMOS EM COMUM**********Dia da Terra',
	'[0.2875379939209726, 0.2753799392097264, 0.4243161094224924, 0.060790273556231005]; [ 0.2978723404255319, 0.16170212765957448, 0.40364741641337387, 0.10638297872340426]',
	1208360201,
	5
	],
	]

	with gr.Blocks(
	title="Glyph-ByT5: A Customized Text Encoder for Accurate Visual Text Rendering",
	css=css,
	) as demo:

	gr.HTML(html)
	with gr.Tab("Multilingual") as multi_tab:
	build_input_block_multilingual(color_idx_list, multi_font_idx_list, multi_examples)

	with gr.Tab("English") as eng_tab:
	build_input_block(color_idx_list, eng_font_idx_list, eng_examples)

	demo.queue()
	demo.launch()

	if __name__ == "__main__":
	main()