import re import gradio as gr import spaces import torch from typing import Tuple from huggingface_hub import hf_hub_download from llama_cpp import Llama model = hf_hub_download( "mradermacher/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf-GGUF", filename="Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf.Q8_0.gguf", ) llama = None def trim_upsampled_caption(prompt: str) -> str: """ Tries to remove extra content around the caption. Cosmos tends to put the output caption in quotes. """ try: return re.match(r".*[\"β€œ](.*)[\"”]", prompt).group(1) except: return prompt @spaces.GPU def upsample(prompt: str) -> Tuple[str, str]: """ Instantiate llama instance if necessary and run prediction. """ global llama if llama is None: llama = Llama( model, n_gpu_layers=-1, n_ctx=2048, verbose=False ) completion = llama.create_chat_completion( messages=[{"role": "user", "content": f"Upsample the short caption to a long caption: {prompt}"}], max_tokens=512, ) response = completion["choices"][0]["message"]["content"] trimmed = trim_upsampled_caption(response) return trimmed, response demo = gr.Interface( title="NVIDIA Cosmos 🌌 Prompt Upsampler", description="""Upsample prompts using NVIDIA's 12B Cosmos model, based on Mistral NeMo 12B. This space uses llama.cpp with the Q8-0 quantized GGUF checkpoint. [[cosmos]](https://huggingface.co/nvidia/Cosmos-1.0-Prompt-Upsampler-12B-Text2World) [[transformers]](https://huggingface.co/appmana/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf) [[gguf]](https://huggingface.co/mradermacher/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf-GGUF)""", fn=upsample, inputs=gr.Text( label="Prompt", value="A dog playing with a ball." ), outputs=[ gr.Text( label="Trimmed Upsampled Prompt", interactive=False, value="In a sun-drenched backyard, a playful golden retriever bounds joyfully across the lush green grass, its tail wagging with excitement. The dog, adorned with a vibrant blue collar, is captivated by a bright yellow ball, which it cradles gently in its mouth, its eyes sparkling with anticipation. The camera, held steady, captures the dog's animated expressions as it trots towards the viewer, its movements fluid and graceful. The scene is bathed in warm, golden-hour light, enhancing the vibrant colors of the dog's fur and the lush surroundings. As the dog playfully tosses the ball into the air, it leaps with enthusiasm, showcasing its agility and playful spirit. The serene backdrop of a white fence and a distant tree line frames this heartwarming moment, inviting viewers to share in the simple joy of a dog's playful antics.", show_copy_button=True ), gr.Text( label="Raw Upsampled Prompt", interactive=False, value="β€œIn a sun-drenched backyard, a playful golden retriever bounds joyfully across the lush green grass, its tail wagging with excitement. The dog, adorned with a vibrant blue collar, is captivated by a bright yellow ball, which it cradles gently in its mouth, its eyes sparkling with anticipation. The camera, held steady, captures the dog's animated expressions as it trots towards the viewer, its movements fluid and graceful. The scene is bathed in warm, golden-hour light, enhancing the vibrant colors of the dog's fur and the lush surroundings. As the dog playfully tosses the ball into the air, it leaps with enthusiasm, showcasing its agility and playful spirit. The serene backdrop of a white fence and a distant tree line frames this heartwarming moment, inviting viewers to share in the simple joy of a dog's playful antics.”", show_copy_button=True ) ] ) demo.launch()