import gradio as gr
import spaces
import torch
import torch.amp as amp

from transformers import MistralForCausalLM, LlamaTokenizer, pipeline

repo_id = "appmana/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf"

model = MistralForCausalLM.from_pretrained(
    repo_id,
    attn_implementation="flash_attention_2",
    torch_dtype=torch.bfloat16
)
tokenizer = LlamaTokenizer.from_pretrained(repo_id)
pipe = pipeline(
    "text-generation",
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16
)

@spaces.GPU
def upsample(prompt):
    template = tokenizer.apply_chat_template(
        [{"role": "user", "content": f"Upsample the short caption to a long caption: {prompt}"}],
        tokenize=False,
        add_generation_prompt=True
    )
    response = pipe(
        template,
        min_p=0.01,
        top_p=0.95,
        top_k=40,
        do_sample=True,
        temperature=0.2,
        max_new_tokens=512,
        pad_token_id=tokenizer.eos_token_id
    )
    return response[0]["generated_text"]

demo = gr.Interface(
    title="NVIDIA Cosmos 🌌 Prompt Upsampler",
    description="""Upsample prompts using NVIDIA's 12B Cosmos model, based on Mistral NeMo 12B. This space uses the HuggingFace Transformers version at bfloat16 precision.
    
    [[cosmos]](https://huggingface.co/nvidia/Cosmos-1.0-Prompt-Upsampler-12B-Text2World) [[transformers]](https://huggingface.co/appmana/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf) [[gguf]](https://huggingface.co/mradermacher/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf-GGUF)""",
    fn=upsample,
    inputs=gr.Text(
        label="Prompt",
        value="A dog playing with a ball."
    ),
    outputs=gr.Text(
        label="Upsampled Prompt",
        interactive=False
    )
)
demo.launch()