Spaces:

benjamin-paine
/

nvidia-cosmos-prompt-upsampler

Running on Zero

App Files Files Community

benjamin-paine commited on 6 days ago

Commit

08b845b

verified ·

1 Parent(s): 43980be

Update app.py

Browse files

Files changed (1) hide show

app.py +19 -31

app.py CHANGED Viewed

@@ -1,47 +1,35 @@
 import gradio as gr
 import spaces
 import torch
-import torch.amp as amp
-from transformers import MistralForCausalLM, LlamaTokenizer, pipeline
-repo_id = "appmana/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf"
-model = MistralForCausalLM.from_pretrained(
-    repo_id,
-    attn_implementation="flash_attention_2",
-    torch_dtype=torch.bfloat16
-)
-tokenizer = LlamaTokenizer.from_pretrained(repo_id)
-pipe = pipeline(
-    "text-generation",
-    model=model,
-    tokenizer=tokenizer,
-    torch_dtype=torch.bfloat16
 )
 @spaces.GPU
 def upsample(prompt):
-    template = tokenizer.apply_chat_template(
-        [{"role": "user", "content": f"Upsample the short caption to a long caption: {prompt}"}],
-        tokenize=False,
-        add_generation_prompt=True
-    )
-    response = pipe(
-        template,
-        min_p=0.01,
-        top_p=0.95,
-        top_k=40,
-        do_sample=True,
-        temperature=0.2,
-        max_new_tokens=512,
-        pad_token_id=tokenizer.eos_token_id
     )
-    return response[0]["generated_text"]
 demo = gr.Interface(
     title="NVIDIA Cosmos 🌌 Prompt Upsampler",
-    description="""Upsample prompts using NVIDIA's 12B Cosmos model, based on Mistral NeMo 12B. This space uses the HuggingFace Transformers version at bfloat16 precision.
     [[cosmos]](https://huggingface.co/nvidia/Cosmos-1.0-Prompt-Upsampler-12B-Text2World) [[transformers]](https://huggingface.co/appmana/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf) [[gguf]](https://huggingface.co/mradermacher/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf-GGUF)""",
     fn=upsample,

 import gradio as gr
 import spaces
 import torch
+from huggingface_hub import hf_hub_download
+from llama_cpp import Llama
+model = hf_hub_download(
+    "mradermacher/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf-GGUF",
+    filename="Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf.Q8_0.gguf",
 )
+llama = None
 @spaces.GPU
 def upsample(prompt):
+    if llama is None:
+        llama = Llama(
+            model,
+            n_gpu_layers=-1,
+            n_ctx=2048,
+            verbose=False
+        )
+    completion = llama.create_chat_completion(
+        messages=[{"role": "user", "content": f"Upsample the short caption to a long caption: {prompt}"}],
+        max_tokens=512,
     )
+    response = completion["choices"][0]["message"]["content"]
+    return response
 demo = gr.Interface(
     title="NVIDIA Cosmos 🌌 Prompt Upsampler",
+    description="""Upsample prompts using NVIDIA's 12B Cosmos model, based on Mistral NeMo 12B. This space uses llama.cpp with the Q8-0 quantized GGUF checkpoint.
     [[cosmos]](https://huggingface.co/nvidia/Cosmos-1.0-Prompt-Upsampler-12B-Text2World) [[transformers]](https://huggingface.co/appmana/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf) [[gguf]](https://huggingface.co/mradermacher/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf-GGUF)""",
     fn=upsample,