benjamin-paine commited on
Commit
08b845b
Β·
verified Β·
1 Parent(s): 43980be

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +19 -31
app.py CHANGED
@@ -1,47 +1,35 @@
1
  import gradio as gr
2
  import spaces
3
  import torch
4
- import torch.amp as amp
5
 
6
- from transformers import MistralForCausalLM, LlamaTokenizer, pipeline
 
7
 
8
- repo_id = "appmana/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf"
9
-
10
- model = MistralForCausalLM.from_pretrained(
11
- repo_id,
12
- attn_implementation="flash_attention_2",
13
- torch_dtype=torch.bfloat16
14
- )
15
- tokenizer = LlamaTokenizer.from_pretrained(repo_id)
16
- pipe = pipeline(
17
- "text-generation",
18
- model=model,
19
- tokenizer=tokenizer,
20
- torch_dtype=torch.bfloat16
21
  )
 
22
 
23
  @spaces.GPU
24
  def upsample(prompt):
25
- template = tokenizer.apply_chat_template(
26
- [{"role": "user", "content": f"Upsample the short caption to a long caption: {prompt}"}],
27
- tokenize=False,
28
- add_generation_prompt=True
29
- )
30
- response = pipe(
31
- template,
32
- min_p=0.01,
33
- top_p=0.95,
34
- top_k=40,
35
- do_sample=True,
36
- temperature=0.2,
37
- max_new_tokens=512,
38
- pad_token_id=tokenizer.eos_token_id
39
  )
40
- return response[0]["generated_text"]
 
41
 
42
  demo = gr.Interface(
43
  title="NVIDIA Cosmos 🌌 Prompt Upsampler",
44
- description="""Upsample prompts using NVIDIA's 12B Cosmos model, based on Mistral NeMo 12B. This space uses the HuggingFace Transformers version at bfloat16 precision.
45
 
46
  [[cosmos]](https://huggingface.co/nvidia/Cosmos-1.0-Prompt-Upsampler-12B-Text2World) [[transformers]](https://huggingface.co/appmana/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf) [[gguf]](https://huggingface.co/mradermacher/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf-GGUF)""",
47
  fn=upsample,
 
1
  import gradio as gr
2
  import spaces
3
  import torch
 
4
 
5
+ from huggingface_hub import hf_hub_download
6
+ from llama_cpp import Llama
7
 
8
+ model = hf_hub_download(
9
+ "mradermacher/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf-GGUF",
10
+ filename="Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf.Q8_0.gguf",
 
 
 
 
 
 
 
 
 
 
11
  )
12
+ llama = None
13
 
14
  @spaces.GPU
15
  def upsample(prompt):
16
+ if llama is None:
17
+ llama = Llama(
18
+ model,
19
+ n_gpu_layers=-1,
20
+ n_ctx=2048,
21
+ verbose=False
22
+ )
23
+ completion = llama.create_chat_completion(
24
+ messages=[{"role": "user", "content": f"Upsample the short caption to a long caption: {prompt}"}],
25
+ max_tokens=512,
 
 
 
 
26
  )
27
+ response = completion["choices"][0]["message"]["content"]
28
+ return response
29
 
30
  demo = gr.Interface(
31
  title="NVIDIA Cosmos 🌌 Prompt Upsampler",
32
+ description="""Upsample prompts using NVIDIA's 12B Cosmos model, based on Mistral NeMo 12B. This space uses llama.cpp with the Q8-0 quantized GGUF checkpoint.
33
 
34
  [[cosmos]](https://huggingface.co/nvidia/Cosmos-1.0-Prompt-Upsampler-12B-Text2World) [[transformers]](https://huggingface.co/appmana/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf) [[gguf]](https://huggingface.co/mradermacher/Cosmos-1.0-Prompt-Upsampler-12B-Text2World-hf-GGUF)""",
35
  fn=upsample,