canvas-studio

Starting on Zero

App Files Files Community

canvas-studio / app.py

ginipick

Update app.py

0e99320 verified 27 days ago

raw

history blame

23.6 kB

	import tempfile
	import time
	from collections.abc import Sequence
	from typing import Any, cast
	import os
	import gc
	from huggingface_hub import login, hf_hub_download

	import gradio as gr
	import numpy as np
	import pillow_heif
	import spaces
	import torch
	from gradio_image_annotation import image_annotator
	from gradio_imageslider import ImageSlider
	from PIL import Image
	from pymatting.foreground.estimate_foreground_ml import estimate_foreground_ml
	from refiners.fluxion.utils import no_grad
	from refiners.solutions import BoxSegmenter
	from transformers import GroundingDinoForObjectDetection, GroundingDinoProcessor
	from diffusers import FluxPipeline
	from transformers import pipeline, AutoTokenizer, AutoModelForSeq2SeqLM

	#############################################################
	# 메모리 정리 함수
	def clear_memory():
	gc.collect()
	try:
	if torch.cuda.is_available():
	with torch.cuda.device(0): # 명시적으로 device 0 사용
	torch.cuda.empty_cache()
	except Exception as e:
	pass

	#############################################################
	# GPU 설정 (Zero GPU 환경)
	device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
	if torch.cuda.is_available():
	try:
	with torch.cuda.device(0):
	torch.cuda.empty_cache()
	torch.backends.cudnn.benchmark = True
	torch.backends.cuda.matmul.allow_tf32 = True
	except Exception as e:
	print("Warning: Could not configure CUDA settings")

	#############################################################
	# 번역 모델 초기화 (CPU에서 동작)
	model_name = "Helsinki-NLP/opus-mt-ko-en"
	tokenizer = AutoTokenizer.from_pretrained(model_name)
	# 번역 모델은 CPU에 올림
	model = AutoModelForSeq2SeqLM.from_pretrained(model_name).to("cpu")
	translator = pipeline("translation", model=model, tokenizer=tokenizer, device=-1)

	def translate_to_english(text: str) -> str:
	"""한글 텍스트를 영어로 번역"""
	try:
	if any(ord('가') <= ord(char) <= ord('힣') for char in text):
	translated = translator(text, max_length=128)[0]['translation_text']
	print(f"Translated '{text}' to '{translated}'")
	return translated
	return text
	except Exception as e:
	print(f"Translation error: {str(e)}")
	return text

	BoundingBox = tuple[int, int, int, int]

	pillow_heif.register_heif_opener()
	pillow_heif.register_avif_opener()

	#############################################################
	# HF 토큰 설정
	HF_TOKEN = os.getenv("HF_TOKEN")
	if HF_TOKEN is None:
	raise ValueError("Please set the HF_TOKEN environment variable")

	try:
	login(token=HF_TOKEN)
	except Exception as e:
	raise ValueError(f"Failed to login to Hugging Face: {str(e)}")

	#############################################################
	# 객체 분할 모델 초기화
	segmenter = BoxSegmenter(device="cpu")
	segmenter.device = device
	segmenter.model = segmenter.model.to(device=segmenter.device)

	gd_model_path = "IDEA-Research/grounding-dino-base"
	gd_processor = GroundingDinoProcessor.from_pretrained(gd_model_path)
	gd_model = GroundingDinoForObjectDetection.from_pretrained(gd_model_path, torch_dtype=torch.float32)
	gd_model = gd_model.to(device=device)
	assert isinstance(gd_model, GroundingDinoForObjectDetection)

	#############################################################
	# FLUX 파이프라인 초기화 (Zero GPU용)
	pipe = FluxPipeline.from_pretrained(
	"black-forest-labs/FLUX.1-dev",
	torch_dtype=torch.float16,
	use_auth_token=HF_TOKEN
	)
	pipe.enable_attention_slicing(slice_size="auto")
	pipe.load_lora_weights(
	hf_hub_download(
	"ByteDance/Hyper-SD",
	"Hyper-FLUX.1-dev-8steps-lora.safetensors",
	use_auth_token=HF_TOKEN
	)
	)
	pipe.fuse_lora(lora_scale=0.125)
	try:
	if torch.cuda.is_available():
	pipe = pipe.to("cuda:0") # 명시적으로 cuda:0로 이동
	except Exception as e:
	print(f"Warning: Could not move pipeline to CUDA: {str(e)}")

	#############################################################
	# 타이머 클래스
	class timer:
	def __init__(self, method_name="timed process"):
	self.method = method_name
	def __enter__(self):
	self.start = time.time()
	print(f"{self.method} starts")
	def __exit__(self, exc_type, exc_val, exc_tb):
	end = time.time()
	print(f"{self.method} took {str(round(end - self.start, 2))}s")

	#############################################################
	# 유틸리티 함수들
	def bbox_union(bboxes: Sequence[list[int]]) -> BoundingBox \| None:
	if not bboxes:
	return None
	for bbox in bboxes:
	assert len(bbox) == 4
	assert all(isinstance(x, int) for x in bbox)
	return (
	min(bbox[0] for bbox in bboxes),
	min(bbox[1] for bbox in bboxes),
	max(bbox[2] for bbox in bboxes),
	max(bbox[3] for bbox in bboxes),
	)

	def corners_to_pixels_format(bboxes: torch.Tensor, width: int, height: int) -> torch.Tensor:
	x1, y1, x2, y2 = bboxes.round().to(torch.int32).unbind(-1)
	return torch.stack((x1.clamp_(0, width), y1.clamp_(0, height), x2.clamp_(0, width), y2.clamp_(0, height)), dim=-1)

	def gd_detect(img: Image.Image, prompt: str) -> BoundingBox \| None:
	inputs = gd_processor(images=img, text=f"{prompt}.", return_tensors="pt").to(device=device)
	with no_grad():
	outputs = gd_model(**inputs)
	width, height = img.size
	results: dict[str, Any] = gd_processor.post_process_grounded_object_detection(
	outputs,
	inputs["input_ids"],
	target_sizes=[(height, width)],
	)[0]
	assert "boxes" in results and isinstance(results["boxes"], torch.Tensor)
	bboxes = corners_to_pixels_format(results["boxes"].cpu(), width, height)
	return bbox_union(bboxes.numpy().tolist())

	def apply_mask(img: Image.Image, mask_img: Image.Image, defringe: bool = True) -> Image.Image:
	assert img.size == mask_img.size
	img = img.convert("RGB")
	mask_img = mask_img.convert("L")
	if defringe:
	rgb, alpha = np.asarray(img) / 255.0, np.asarray(mask_img) / 255.0
	foreground = cast(np.ndarray[Any, np.dtype[np.uint8]], estimate_foreground_ml(rgb, alpha))
	img = Image.fromarray((foreground * 255).astype("uint8"))
	result = Image.new("RGBA", img.size)
	result.paste(img, (0, 0), mask_img)
	return result

	def adjust_size_to_multiple_of_8(width: int, height: int) -> tuple[int, int]:
	new_width = ((width + 7) // 8) * 8
	new_height = ((height + 7) // 8) * 8
	return new_width, new_height

	def calculate_dimensions(aspect_ratio: str, base_size: int = 512) -> tuple[int, int]:
	if aspect_ratio == "1:1":
	return base_size, base_size
	elif aspect_ratio == "16:9":
	return base_size * 16 // 9, base_size
	elif aspect_ratio == "9:16":
	return base_size, base_size * 16 // 9
	elif aspect_ratio == "4:3":
	return base_size * 4 // 3, base_size
	return base_size, base_size

	#############################################################
	# 배경 생성 함수 (Zero GPU에 맞게 수정)
	@spaces.GPU(duration=20)
	def generate_background(prompt: str, aspect_ratio: str) -> Image.Image:
	try:
	width, height = calculate_dimensions(aspect_ratio)
	width, height = adjust_size_to_multiple_of_8(width, height)

	max_size = 768
	if width > max_size or height > max_size:
	ratio = max_size / max(width, height)
	width = int(width * ratio)
	height = int(height * ratio)
	width, height = adjust_size_to_multiple_of_8(width, height)

	with timer("Background generation"):
	try:
	with torch.inference_mode():
	image = pipe(
	prompt=prompt,
	width=width,
	height=height,
	num_inference_steps=8,
	guidance_scale=4.0
	).images[0]
	except Exception as e:
	print(f"Pipeline error: {str(e)}")
	return Image.new('RGB', (width, height), 'white')
	return image
	except Exception as e:
	print(f"Background generation error: {str(e)}")
	return Image.new('RGB', (512, 512), 'white')

	def create_position_grid():
	return """
	<div class="position-grid" style="display: grid; grid-template-columns: repeat(3, 1fr); gap: 10px; width: 150px; margin: auto;">
	<button class="position-btn" data-pos="top-left">↖</button>
	<button class="position-btn" data-pos="top-center">↑</button>
	<button class="position-btn" data-pos="top-right">↗</button>
	<button class="position-btn" data-pos="middle-left">←</button>
	<button class="position-btn" data-pos="middle-center">•</button>
	<button class="position-btn" data-pos="middle-right">→</button>
	<button class="position-btn" data-pos="bottom-left">↙</button>
	<button class="position-btn" data-pos="bottom-center" data-default="true">↓</button>
	<button class="position-btn" data-pos="bottom-right">↘</button>
	</div>
	"""

	def calculate_object_position(position: str, bg_size: tuple[int, int], obj_size: tuple[int, int]) -> tuple[int, int]:
	bg_width, bg_height = bg_size
	obj_width, obj_height = obj_size

	positions = {
	"top-left": (0, 0),
	"top-center": ((bg_width - obj_width) // 2, 0),
	"top-right": (bg_width - obj_width, 0),
	"middle-left": (0, (bg_height - obj_height) // 2),
	"middle-center": ((bg_width - obj_width) // 2, (bg_height - obj_height) // 2),
	"middle-right": (bg_width - obj_width, (bg_height - obj_height) // 2),
	"bottom-left": (0, bg_height - obj_height),
	"bottom-center": ((bg_width - obj_width) // 2, bg_height - obj_height),
	"bottom-right": (bg_width - obj_width, bg_height - obj_height)
	}

	return positions.get(position, positions["bottom-center"])

	def resize_object(image: Image.Image, scale_percent: float) -> Image.Image:
	width = int(image.width * scale_percent / 100)
	height = int(image.height * scale_percent / 100)
	return image.resize((width, height), Image.Resampling.LANCZOS)

	def combine_with_background(foreground: Image.Image, background: Image.Image,
	position: str = "bottom-center", scale_percent: float = 100) -> Image.Image:
	result = background.convert('RGBA')
	scaled_foreground = resize_object(foreground, scale_percent)
	x, y = calculate_object_position(position, result.size, scaled_foreground.size)
	result.paste(scaled_foreground, (x, y), scaled_foreground)
	return result

	#############################################################
	# GPU 처리 함수 (Zero GPU에 맞게 수정)
	@spaces.GPU(duration=30)
	def _gpu_process(img: Image.Image, prompt: str \| BoundingBox \| None) -> tuple[Image.Image, BoundingBox \| None, list[str]]:
	time_log: list[str] = []
	try:
	if isinstance(prompt, str):
	t0 = time.time()
	bbox = gd_detect(img, prompt)
	time_log.append(f"detect: {time.time() - t0}")
	if not bbox:
	print(time_log[0])
	raise gr.Error("No object detected")
	else:
	bbox = prompt
	t0 = time.time()
	mask = segmenter(img, bbox)
	time_log.append(f"segment: {time.time() - t0}")
	return mask, bbox, time_log
	except Exception as e:
	print(f"GPU process error: {str(e)}")
	raise

	#############################################################
	# 전체 처리 함수
	def _process(img: Image.Image, prompt: str \| BoundingBox \| None, bg_prompt: str \| None = None, aspect_ratio: str = "1:1") -> tuple[tuple[Image.Image, Image.Image, Image.Image], gr.DownloadButton]:
	try:
	# 입력 이미지 크기 제한
	max_size = 1024
	if img.width > max_size or img.height > max_size:
	ratio = max_size / max(img.width, img.height)
	new_size = (int(img.width * ratio), int(img.height * ratio))
	img = img.resize(new_size, Image.LANCZOS)

	try:
	if torch.cuda.is_available():
	current_device = torch.cuda.current_device()
	with torch.cuda.device(current_device):
	torch.cuda.empty_cache()
	except Exception as e:
	print(f"CUDA memory management failed: {e}")

	with torch.cuda.amp.autocast(enabled=torch.cuda.is_available()):
	mask, bbox, time_log = _gpu_process(img, prompt)
	masked_alpha = apply_mask(img, mask, defringe=True)

	if bg_prompt:
	background = generate_background(bg_prompt, aspect_ratio)
	combined = background
	else:
	combined = Image.alpha_composite(Image.new("RGBA", masked_alpha.size, "white"), masked_alpha)

	clear_memory()

	with tempfile.NamedTemporaryFile(delete=False, suffix=".png") as temp:
	combined.save(temp.name)
	return (img, combined, masked_alpha), gr.DownloadButton(value=temp.name, interactive=True)
	except Exception as e:
	clear_memory()
	print(f"Processing error: {str(e)}")
	raise gr.Error(f"Processing failed: {str(e)}")

	def on_change_bbox(prompts: dict[str, Any] \| None):
	return gr.update(interactive=prompts is not None)

	def on_change_prompt(img: Image.Image \| None, prompt: str \| None, bg_prompt: str \| None = None):
	return gr.update(interactive=bool(img and prompt))

	def process_prompt(img: Image.Image, prompt: str, bg_prompt: str \| None = None,
	aspect_ratio: str = "1:1", position: str = "bottom-center",
	scale_percent: float = 100) -> tuple[Image.Image, Image.Image]:
	try:
	if img is None or prompt.strip() == "":
	raise gr.Error("Please provide both image and prompt")

	print(f"Processing with position: {position}, scale: {scale_percent}")

	try:
	prompt = translate_to_english(prompt)
	if bg_prompt:
	bg_prompt = translate_to_english(bg_prompt)
	except Exception as e:
	print(f"Translation error (continuing with original text): {str(e)}")

	results, _ = _process(img, prompt, bg_prompt, aspect_ratio)

	if bg_prompt:
	try:
	combined = combine_with_background(
	foreground=results[2],
	background=results[1],
	position=position,
	scale_percent=scale_percent
	)
	print(f"Combined image created with position: {position}")
	return combined, results[2]
	except Exception as e:
	print(f"Combination error: {str(e)}")
	return results[1], results[2]

	return results[1], results[2]
	except Exception as e:
	print(f"Error in process_prompt: {str(e)}")
	raise gr.Error(str(e))
	finally:
	clear_memory()

	def process_bbox(img: Image.Image, box_input: str) -> tuple[Image.Image, Image.Image]:
	try:
	if img is None or box_input.strip() == "":
	raise gr.Error("Please provide both image and bounding box coordinates")

	try:
	coords = eval(box_input)
	if not isinstance(coords, list) or len(coords) != 4:
	raise ValueError("Invalid box format")
	bbox = tuple(int(x) for x in coords)
	except:
	raise gr.Error("Invalid box format. Please provide [xmin, ymin, xmax, ymax]")

	results, _ = _process(img, bbox)
	return results[1], results[2]
	except Exception as e:
	raise gr.Error(str(e))

	def update_process_button(img, prompt):
	return gr.update(
	interactive=bool(img and prompt),
	variant="primary" if bool(img and prompt) else "secondary"
	)

	def update_box_button(img, box_input):
	try:
	if img and box_input:
	coords = eval(box_input)
	if isinstance(coords, list) and len(coords) == 4:
	return gr.update(interactive=True, variant="primary")
	return gr.update(interactive=False, variant="secondary")
	except:
	return gr.update(interactive=False, variant="secondary")

	#############################################################
	# CSS 정의
	css = """
	footer {display: none}
	.main-title {
	text-align: center;
	margin: 2em 0;
	padding: 1em;
	background: #f7f7f7;
	border-radius: 10px;
	}
	.main-title h1 {
	color: #2196F3;
	font-size: 2.5em;
	margin-bottom: 0.5em;
	}
	.main-title p {
	color: #666;
	font-size: 1.2em;
	}
	.container {
	max-width: 1200px;
	margin: auto;
	padding: 20px;
	}
	.tabs {
	margin-top: 1em;
	}
	.input-group {
	background: white;
	padding: 1em;
	border-radius: 8px;
	box-shadow: 0 2px 4px rgba(0,0,0,0.1);
	}
	.output-group {
	background: white;
	padding: 1em;
	border-radius: 8px;
	box-shadow: 0 2px 4px rgba(0,0,0,0.1);
	}
	button.primary {
	background: #2196F3;
	border: none;
	color: white;
	padding: 0.5em 1em;
	border-radius: 4px;
	cursor: pointer;
	transition: background 0.3s ease;
	}
	button.primary:hover {
	background: #1976D2;
	}
	.position-btn {
	transition: all 0.3s ease;
	}
	.position-btn:hover {
	background-color: #e3f2fd;
	}
	.position-btn.selected {
	background-color: #2196F3;
	color: white;
	}
	"""

	#############################################################
	# UI 구성
	with gr.Blocks(theme=gr.themes.Soft(), css=css) as demo:
	gr.HTML("""
	<div class="main-title">
	<h1>🎨GiniGen Canvas</h1>
	<p>AI Integrated Image Creator: Extract objects, generate backgrounds, and adjust ratios and positions to create complete images with AI.</p>
	</div>
	""")
	with gr.Row():
	with gr.Column(scale=1):
	input_image = gr.Image(
	type="pil",
	label="Upload Image",
	interactive=True
	)
	text_prompt = gr.Textbox(
	label="Object to Extract",
	placeholder="Enter what you want to extract...",
	interactive=True
	)
	with gr.Row():
	bg_prompt = gr.Textbox(
	label="Background Prompt (optional)",
	placeholder="Describe the background...",
	interactive=True,
	scale=3
	)
	aspect_ratio = gr.Dropdown(
	choices=["1:1", "16:9", "9:16", "4:3"],
	value="1:1",
	label="Aspect Ratio",
	interactive=True,
	visible=True,
	scale=1
	)
	with gr.Row(visible=False) as object_controls:
	with gr.Column(scale=1):
	with gr.Row():
	position = gr.State(value="bottom-center")
	btn_top_left = gr.Button("↖")
	btn_top_center = gr.Button("↑")
	btn_top_right = gr.Button("↗")
	with gr.Row():
	btn_middle_left = gr.Button("←")
	btn_middle_center = gr.Button("•")
	btn_middle_right = gr.Button("→")
	with gr.Row():
	btn_bottom_left = gr.Button("↙")
	btn_bottom_center = gr.Button("↓")
	btn_bottom_right = gr.Button("↘")
	with gr.Column(scale=1):
	scale_slider = gr.Slider(
	minimum=10,
	maximum=200,
	value=50,
	step=5,
	label="Object Size (%)"
	)
	process_btn = gr.Button(
	"Process",
	variant="primary",
	interactive=False
	)
	# 각 버튼에 대한 클릭 이벤트 처리
	def update_position(new_position):
	return new_position
	btn_top_left.click(fn=lambda: update_position("top-left"), outputs=position)
	btn_top_center.click(fn=lambda: update_position("top-center"), outputs=position)
	btn_top_right.click(fn=lambda: update_position("top-right"), outputs=position)
	btn_middle_left.click(fn=lambda: update_position("middle-left"), outputs=position)
	btn_middle_center.click(fn=lambda: update_position("middle-center"), outputs=position)
	btn_middle_right.click(fn=lambda: update_position("middle-right"), outputs=position)
	btn_bottom_left.click(fn=lambda: update_position("bottom-left"), outputs=position)
	btn_bottom_center.click(fn=lambda: update_position("bottom-center"), outputs=position)
	btn_bottom_right.click(fn=lambda: update_position("bottom-right"), outputs=position)
	with gr.Column(scale=1):
	with gr.Row():
	combined_image = gr.Image(
	label="Combined Result",
	show_download_button=True,
	type="pil",
	height=512
	)
	with gr.Row():
	extracted_image = gr.Image(
	label="Extracted Object",
	show_download_button=True,
	type="pil",
	height=256
	)
	# Event bindings
	input_image.change(
	fn=update_process_button,
	inputs=[input_image, text_prompt],
	outputs=process_btn,
	queue=False
	)
	text_prompt.change(
	fn=update_process_button,
	inputs=[input_image, text_prompt],
	outputs=process_btn,
	queue=False
	)
	def update_controls(bg_prompt):
	is_visible = bool(bg_prompt)
	return [
	gr.update(visible=is_visible),
	gr.update(visible=is_visible),
	]
	bg_prompt.change(
	fn=update_controls,
	inputs=bg_prompt,
	outputs=[aspect_ratio, object_controls],
	queue=False
	)
	process_btn.click(
	fn=process_prompt,
	inputs=[
	input_image,
	text_prompt,
	bg_prompt,
	aspect_ratio,
	position,
	scale_slider
	],
	outputs=[combined_image, extracted_image],
	queue=True
	)
	# 예제 섹션 추가
	with gr.Accordion("Show Example", open=True):
	gr.Markdown("### Example")
	with gr.Row():
	with gr.Column():
	gr.Markdown("Upload Image(aa1.png)")
	gr.Image(value="aa1.png", label="Upload")
	with gr.Column():
	gr.Markdown("Cut Object (aa2.png)<br>(Prompt: 'text')", elem_classes="center")
	gr.Image(value="aa2.png", label="Object")
	with gr.Column():
	gr.Markdown("Generated Image (aa3.png)<br>(Background Prompt: 'alps mountain')", elem_classes="center")
	gr.Image(value="aa3.png", label="Output")
	demo.queue(max_size=5)
	demo.launch(
	server_name="0.0.0.0",
	server_port=7860,
	share=False,
	max_threads=2
	)