import os import torch from PIL import Image from diffusers import StableDiffusionControlNetPipeline, ControlNetModel import gradio as gr # Disable oneDNN custom operations os.environ['TF_ENABLE_ONEDNN_OPTS'] = '0' # Clear PyTorch cache torch.cuda.empty_cache() # Check if CUDA is available device = "cuda" if torch.cuda.is_available() else "cpu" if device == "cuda": print("CUDA is available. Device count:", torch.cuda.device_count()) print("Current device:", torch.cuda.current_device()) print("Device name:", torch.cuda.get_device_name(torch.cuda.current_device())) else: print("CUDA is not available. Using CPU.") # Load ControlNet model with OpenPose pre-trained weights from Hugging Face controlnet = ControlNetModel.from_pretrained("lllyasviel/control_v11p_sd15_openpose", torch_dtype=torch.float16) # Load the Stable Diffusion model pipe = StableDiffusionControlNetPipeline.from_pretrained( "runwayml/stable-diffusion-v1-5", controlnet=controlnet, torch_dtype=torch.float16 ).to(device) # Function for inference def generate_image(prompt, target_image, pose_image): try: # Resize images target_image = target_image.resize((512, 512)) pose_image = pose_image.resize((512, 512)) # Generate image with ControlNet output = pipe(prompt=prompt, image=target_image, control_image=pose_image, num_inference_steps=50) # Return the result return output["sample"][0] except Exception as e: print(f"Error during image generation: {e}") return None # Setup Gradio Interface interface = gr.Interface( fn=generate_image, inputs=[ gr.Textbox(label="Prompt"), gr.Image(label="Target Image", type="pil"), gr.Image(label="Pose Image (Reference)", type="pil") ], outputs=gr.Image(label="Generated Image") ) # Launch the interface interface.launch()