import gradio as gr import spaces import torch import torch.amp as amp from transformers import MistralForCausalLM, LlamaTokenizer, pipeline repo_id = "appmana/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf" model = MistralForCausalLM.from_pretrained( repo_id, attn_implementation="flash_attention_2", torch_dtype=torch.bfloat16 ) tokenizer = LlamaTokenizer.from_pretrained(repo_id) pipe = pipeline( "text-generation", model=model, tokenizer=tokenizer, torch_dtype=torch.bfloat16 ) @spaces.GPU def upsample(prompt): template = tokenizer.apply_chat_template( [{"role": "user", "content": f"Upsample the short caption to a long caption: {prompt}"}], tokenize=False, add_generation_prompt=True ) response = pipe( template, min_p=0.01, top_p=0.95, top_k=40, do_sample=True, temperature=0.2, max_new_tokens=512, pad_token_id=tokenizer.eos_token_id ) return response[0]["generated_text"] demo = gr.Interface( title="NVIDIA Cosmos 🌌 Prompt Upsampler", description="""Upsample prompts using NVIDIA's 12B Cosmos model, based on Mistral NeMo 12B. This space uses the HuggingFace Transformers version at bfloat16 precision. [[cosmos]](https://huggingface.co/nvidia/Cosmos-1.0-Prompt-Upsampler-12B-Text2World) [[transformers]](https://huggingface.co/appmana/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf) [[gguf]](https://huggingface.co/mradermacher/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf-GGUF)""", fn=upsample, inputs=gr.Text( label="Prompt", value="A dog playing with a ball." ), outputs=gr.Text( label="Upsampled Prompt", interactive=False ) ) demo.launch()