tsqn commited on
Commit
d54fea7
·
verified ·
1 Parent(s): 7cb486f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +127 -93
app.py CHANGED
@@ -1,10 +1,11 @@
1
  """
2
- THis is the main file for the gradio web demo. It uses the CogVideoX-5B model to generate videos gradio web demo.
3
  set environment variable OPENAI_API_KEY to use the OpenAI API to enhance the prompt.
4
 
5
  Usage:
6
  OpenAI_API_KEY=your_openai_api_key OPENAI_BASE_URL=https://api.openai.com/v1 python inference/gradio_web_demo.py
7
  """
 
8
 
9
  import math
10
  import os
@@ -17,8 +18,11 @@ import tempfile
17
  import imageio_ffmpeg
18
  import gradio as gr
19
  import torch
 
 
20
  from PIL import Image
21
  from diffusers import (
 
22
  CogVideoXPipeline,
23
  CogVideoXDPMScheduler,
24
  CogVideoXVideoToVideoPipeline,
@@ -27,26 +31,49 @@ from diffusers import (
27
  )
28
  from diffusers.utils import load_video, load_image
29
  from datetime import datetime, timedelta
30
-
31
  from diffusers.image_processor import VaeImageProcessor
32
- from openai import OpenAI
33
  import moviepy.editor as mp
34
  import utils
35
- from rife_model import load_rife_model, rife_inference_with_latents
36
- from huggingface_hub import hf_hub_download, snapshot_download
37
  import gc
38
 
39
  device = "cuda" if torch.cuda.is_available() else "cpu"
40
 
41
- hf_hub_download(repo_id="ai-forever/Real-ESRGAN", filename="RealESRGAN_x4.pth", local_dir="model_real_esran")
42
- snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
43
-
44
- pipe = CogVideoXPipeline.from_pretrained("THUDM/CogVideoX-5b", torch_dtype=torch.bfloat16).to("cpu")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
45
  pipe.scheduler = CogVideoXDPMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
46
 
 
 
 
 
47
  i2v_transformer = CogVideoXTransformer3DModel.from_pretrained(
48
- "THUDM/CogVideoX-5b-I2V", subfolder="transformer", torch_dtype=torch.bfloat16
49
  )
 
 
 
 
 
 
50
 
51
  # pipe.transformer.to(memory_format=torch.channels_last)
52
  # pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
@@ -56,8 +83,8 @@ i2v_transformer = CogVideoXTransformer3DModel.from_pretrained(
56
  os.makedirs("./output", exist_ok=True)
57
  os.makedirs("./gradio_tmp", exist_ok=True)
58
 
59
- upscale_model = utils.load_sd_upscale("model_real_esran/RealESRGAN_x4.pth", device)
60
- frame_interpolation_model = load_rife_model("model_rife")
61
 
62
  sys_prompt = """You are part of a team of bots that creates videos. You work with an assistant bot that will draw anything you say in square brackets.
63
 
@@ -147,54 +174,54 @@ def center_crop_resize(input_video_path, target_width=720, target_height=480):
147
  return temp_video_path
148
 
149
 
150
- def convert_prompt(prompt: str, retry_times: int = 3) -> str:
151
- if not os.environ.get("OPENAI_API_KEY"):
152
- return prompt
153
- client = OpenAI()
154
- text = prompt.strip()
155
-
156
- for i in range(retry_times):
157
- response = client.chat.completions.create(
158
- messages=[
159
- {"role": "system", "content": sys_prompt},
160
- {
161
- "role": "user",
162
- "content": 'Create an imaginative video descriptive caption or modify an earlier caption for the user input : "a girl is on the beach"',
163
- },
164
- {
165
- "role": "assistant",
166
- "content": "A radiant woman stands on a deserted beach, arms outstretched, wearing a beige trench coat, white blouse, light blue jeans, and chic boots, against a backdrop of soft sky and sea. Moments later, she is seen mid-twirl, arms exuberant, with the lighting suggesting dawn or dusk. Then, she runs along the beach, her attire complemented by an off-white scarf and black ankle boots, the tranquil sea behind her. Finally, she holds a paper airplane, her pose reflecting joy and freedom, with the ocean's gentle waves and the sky's soft pastel hues enhancing the serene ambiance.",
167
- },
168
- {
169
- "role": "user",
170
- "content": 'Create an imaginative video descriptive caption or modify an earlier caption for the user input : "A man jogging on a football field"',
171
- },
172
- {
173
- "role": "assistant",
174
- "content": "A determined man in athletic attire, including a blue long-sleeve shirt, black shorts, and blue socks, jogs around a snow-covered soccer field, showcasing his solitary exercise in a quiet, overcast setting. His long dreadlocks, focused expression, and the serene winter backdrop highlight his dedication to fitness. As he moves, his attire, consisting of a blue sports sweatshirt, black athletic pants, gloves, and sneakers, grips the snowy ground. He is seen running past a chain-link fence enclosing the playground area, with a basketball hoop and children's slide, suggesting a moment of solitary exercise amidst the empty field.",
175
- },
176
- {
177
- "role": "user",
178
- "content": 'Create an imaginative video descriptive caption or modify an earlier caption for the user input : " A woman is dancing, HD footage, close-up"',
179
- },
180
- {
181
- "role": "assistant",
182
- "content": "A young woman with her hair in an updo and wearing a teal hoodie stands against a light backdrop, initially looking over her shoulder with a contemplative expression. She then confidently makes a subtle dance move, suggesting rhythm and movement. Next, she appears poised and focused, looking directly at the camera. Her expression shifts to one of introspection as she gazes downward slightly. Finally, she dances with confidence, her left hand over her heart, symbolizing a poignant moment, all while dressed in the same teal hoodie against a plain, light-colored background.",
183
- },
184
- {
185
- "role": "user",
186
- "content": f'Create an imaginative video descriptive caption or modify an earlier caption in ENGLISH for the user input: "{text}"',
187
- },
188
- ],
189
- model="glm-4-plus",
190
- temperature=0.01,
191
- top_p=0.7,
192
- stream=False,
193
- max_tokens=200,
194
- )
195
- if response.choices:
196
- return response.choices[0].message.content
197
- return prompt
198
 
199
 
200
  def infer(
@@ -213,14 +240,18 @@ def infer(
213
  if video_input is not None:
214
  video = load_video(video_input)[:49] # Limit to 49 frames
215
  pipe_video = CogVideoXVideoToVideoPipeline.from_pretrained(
216
- "THUDM/CogVideoX-5b",
217
- transformer=pipe.transformer,
218
- vae=pipe.vae,
219
  scheduler=pipe.scheduler,
220
  tokenizer=pipe.tokenizer,
221
- text_encoder=pipe.text_encoder,
222
  torch_dtype=torch.bfloat16,
223
  ).to(device)
 
 
 
 
224
  video_pt = pipe_video(
225
  video=video,
226
  prompt=prompt,
@@ -238,12 +269,12 @@ def infer(
238
  torch.cuda.empty_cache()
239
  elif image_input is not None:
240
  pipe_image = CogVideoXImageToVideoPipeline.from_pretrained(
241
- "THUDM/CogVideoX-5b-I2V",
242
  transformer=i2v_transformer,
243
- vae=pipe.vae,
244
  scheduler=pipe.scheduler,
245
  tokenizer=pipe.tokenizer,
246
- text_encoder=pipe.text_encoder,
247
  torch_dtype=torch.bfloat16,
248
  ).to(device)
249
  image_input = Image.fromarray(image_input).resize(size=(720, 480)) # Convert to PIL
@@ -311,17 +342,17 @@ examples_images = [["example_images/beach.png"], ["example_images/street.png"],
311
  with gr.Blocks() as demo:
312
  gr.Markdown("""
313
  <div style="text-align: center; font-size: 32px; font-weight: bold; margin-bottom: 20px;">
314
- CogVideoX-5B Huggingface Space🤗
315
  </div>
316
  <div style="text-align: center;">
317
- <a href="https://huggingface.co/THUDM/CogVideoX-5B">🤗 5B(T2V) Model Hub</a> |
318
- <a href="https://huggingface.co/THUDM/CogVideoX-5B-I2V">🤗 5B(I2V) Model Hub</a> |
319
  <a href="https://github.com/THUDM/CogVideo">🌐 Github</a> |
320
  <a href="https://arxiv.org/pdf/2408.06072">📜 arxiv </a>
321
  </div>
322
  <div style="text-align: center;display: flex;justify-content: center;align-items: center;margin-top: 1em;margin-bottom: .5em;">
323
  <span>If the Space is too busy, duplicate it to use privately</span>
324
- <a href="https://huggingface.co/spaces/THUDM/CogVideoX-5B-Space?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" width="160" style="
325
  margin-left: .75em;
326
  "></a>
327
  </div>
@@ -340,23 +371,23 @@ with gr.Blocks() as demo:
340
  examples_component_videos = gr.Examples(examples_videos, inputs=[video_input], cache_examples=False)
341
  prompt = gr.Textbox(label="Prompt (Less than 200 Words)", placeholder="Enter your prompt here", lines=5)
342
 
343
- with gr.Row():
344
- gr.Markdown(
345
- "✨Upon pressing the enhanced prompt button, we will use [GLM-4 Model](https://github.com/THUDM/GLM-4) to polish the prompt and overwrite the original one."
346
- )
347
- enhance_button = gr.Button("✨ Enhance Prompt(Optional)")
348
  with gr.Group():
349
  with gr.Column():
350
  with gr.Row():
351
  seed_param = gr.Number(
352
  label="Inference Seed (Enter a positive number, -1 for random)", value=-1
353
  )
354
- with gr.Row():
355
- enable_scale = gr.Checkbox(label="Super-Resolution (720 × 480 -> 2880 × 1920)", value=False)
356
- enable_rife = gr.Checkbox(label="Frame Interpolation (8fps -> 16fps)", value=False)
357
- gr.Markdown(
358
- "✨In this demo, we use [RIFE](https://github.com/hzwer/ECCV2022-RIFE) for frame interpolation and [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN) for upscaling(Super-Resolution).<br>&nbsp;&nbsp;&nbsp;&nbsp;The entire process is based on open-source solutions."
359
- )
360
 
361
  generate_button = gr.Button("🎬 Generate Video")
362
 
@@ -431,14 +462,15 @@ with gr.Blocks() as demo:
431
  </table>
432
  """)
433
 
 
434
  def generate(
435
  prompt,
436
  image_input,
437
  video_input,
438
  video_strength,
439
  seed_value,
440
- scale_status,
441
- rife_status,
442
  progress=gr.Progress(track_tqdm=True)
443
  ):
444
  latents, seed = infer(
@@ -446,15 +478,15 @@ with gr.Blocks() as demo:
446
  image_input,
447
  video_input,
448
  video_strength,
449
- num_inference_steps=50, # NOT Changed
450
  guidance_scale=7.0, # NOT Changed
451
  seed=seed_value,
452
  progress=progress,
453
  )
454
- if scale_status:
455
- latents = utils.upscale_batch_and_concatenate(upscale_model, latents, device)
456
- if rife_status:
457
- latents = rife_inference_with_latents(frame_interpolation_model, latents)
458
 
459
  batch_size = latents.shape[0]
460
  batch_video_frames = []
@@ -474,18 +506,20 @@ with gr.Blocks() as demo:
474
 
475
  return video_path, video_update, gif_update, seed_update
476
 
477
- def enhance_prompt_func(prompt):
478
- return convert_prompt(prompt, retry_times=1)
479
 
480
  generate_button.click(
481
  generate,
482
- inputs=[prompt, image_input, video_input, strength, seed_param, enable_scale, enable_rife],
 
483
  outputs=[video_output, download_video_button, download_gif_button, seed_text],
484
  )
485
 
486
- enhance_button.click(enhance_prompt_func, inputs=[prompt], outputs=[prompt])
487
  video_input.upload(resize_if_unfit, inputs=[video_input], outputs=[video_input])
488
 
489
  if __name__ == "__main__":
 
490
  demo.queue(max_size=15)
491
  demo.launch()
 
1
  """
2
+ THis is the main file for the gradio web demo. It uses the CogVideoX1.5-5B model to generate videos gradio web demo.
3
  set environment variable OPENAI_API_KEY to use the OpenAI API to enhance the prompt.
4
 
5
  Usage:
6
  OpenAI_API_KEY=your_openai_api_key OPENAI_BASE_URL=https://api.openai.com/v1 python inference/gradio_web_demo.py
7
  """
8
+ import spaces
9
 
10
  import math
11
  import os
 
18
  import imageio_ffmpeg
19
  import gradio as gr
20
  import torch
21
+ from torchao.quantization import quantize_, int8_weight_only
22
+ from transformers import T5EncoderModel
23
  from PIL import Image
24
  from diffusers import (
25
+ AutoencoderKLCogVideoX,
26
  CogVideoXPipeline,
27
  CogVideoXDPMScheduler,
28
  CogVideoXVideoToVideoPipeline,
 
31
  )
32
  from diffusers.utils import load_video, load_image
33
  from datetime import datetime, timedelta
 
34
  from diffusers.image_processor import VaeImageProcessor
35
+ #from openai import OpenAI
36
  import moviepy.editor as mp
37
  import utils
38
+ #from rife_model import load_rife_model, rife_inference_with_latents
39
+ #from huggingface_hub import hf_hub_download, snapshot_download
40
  import gc
41
 
42
  device = "cuda" if torch.cuda.is_available() else "cpu"
43
 
44
+ #hf_hub_download(repo_id="ai-forever/Real-ESRGAN", filename="RealESRGAN_x4.pth", local_dir="model_real_esran")
45
+ #snapshot_download(repo_id="AlexWortega/RIFE", local_dir="model_rife")
46
+ quantization = int8_weight_only
47
+
48
+ transformer = CogVideoXTransformer3DModel.from_pretrained("THUDM/CogVideoX1.5-5B", subfolder="transformer", torch_dtype=torch.bfloat16)
49
+ text_encoder = T5EncoderModel.from_pretrained("THUDM/CogVideoX1.5-5B", subfolder="text_encoder", torch_dtype=torch.bfloat16)
50
+ vae = AutoencoderKLCogVideoX.from_pretrained("THUDM/CogVideoX1.5-5B", subfolder="vae", torch_dtype=torch.bfloat16)
51
+ quantize_(transformer, quantization())
52
+ quantize_(text_encoder, quantization())
53
+ # quantize_(vae, quantization())
54
+
55
+ pipe = CogVideoXPipeline.from_pretrained(
56
+ "THUDM/CogVideoX1.5-5B",
57
+ text_encoder=text_encoder,
58
+ transformer=transformer,
59
+ vae=vae,
60
+ torch_dtype=torch.bfloat16
61
+ ).to("cpu")
62
  pipe.scheduler = CogVideoXDPMScheduler.from_config(pipe.scheduler.config, timestep_spacing="trailing")
63
 
64
+ pipe.enable_model_cpu_offload()
65
+ pipe.vae.enable_tiling()
66
+ pipe.vae.enable_slicing()
67
+
68
  i2v_transformer = CogVideoXTransformer3DModel.from_pretrained(
69
+ "THUDM/CogVideoX1.5-5B-I2V", subfolder="transformer", torch_dtype=torch.bfloat16
70
  )
71
+ i2v_text_encoder = T5EncoderModel.from_pretrained("THUDM/CogVideoX1.5-5B-I2V", subfolder="text_encoder", torch_dtype=torch.bfloat16)
72
+ i2v_vae = AutoencoderKLCogVideoX.from_pretrained("THUDM/CogVideoX1.5-5B-I2V", subfolder="vae", torch_dtype=torch.bfloat16)
73
+
74
+ quantize_(i2v_transformer, quantization())
75
+ quantize_(i2v_text_encoder, quantization())
76
+ # quantize_(i2v_vae, quantization())
77
 
78
  # pipe.transformer.to(memory_format=torch.channels_last)
79
  # pipe.transformer = torch.compile(pipe.transformer, mode="max-autotune", fullgraph=True)
 
83
  os.makedirs("./output", exist_ok=True)
84
  os.makedirs("./gradio_tmp", exist_ok=True)
85
 
86
+ #upscale_model = utils.load_sd_upscale("model_real_esran/RealESRGAN_x4.pth", device)
87
+ #frame_interpolation_model = load_rife_model("model_rife")
88
 
89
  sys_prompt = """You are part of a team of bots that creates videos. You work with an assistant bot that will draw anything you say in square brackets.
90
 
 
174
  return temp_video_path
175
 
176
 
177
+ # def convert_prompt(prompt: str, retry_times: int = 3) -> str:
178
+ # if not os.environ.get("OPENAI_API_KEY"):
179
+ # return prompt
180
+ # client = OpenAI()
181
+ # text = prompt.strip()
182
+
183
+ # for i in range(retry_times):
184
+ # response = client.chat.completions.create(
185
+ # messages=[
186
+ # {"role": "system", "content": sys_prompt},
187
+ # {
188
+ # "role": "user",
189
+ # "content": 'Create an imaginative video descriptive caption or modify an earlier caption for the user input : "a girl is on the beach"',
190
+ # },
191
+ # {
192
+ # "role": "assistant",
193
+ # "content": "A radiant woman stands on a deserted beach, arms outstretched, wearing a beige trench coat, white blouse, light blue jeans, and chic boots, against a backdrop of soft sky and sea. Moments later, she is seen mid-twirl, arms exuberant, with the lighting suggesting dawn or dusk. Then, she runs along the beach, her attire complemented by an off-white scarf and black ankle boots, the tranquil sea behind her. Finally, she holds a paper airplane, her pose reflecting joy and freedom, with the ocean's gentle waves and the sky's soft pastel hues enhancing the serene ambiance.",
194
+ # },
195
+ # {
196
+ # "role": "user",
197
+ # "content": 'Create an imaginative video descriptive caption or modify an earlier caption for the user input : "A man jogging on a football field"',
198
+ # },
199
+ # {
200
+ # "role": "assistant",
201
+ # "content": "A determined man in athletic attire, including a blue long-sleeve shirt, black shorts, and blue socks, jogs around a snow-covered soccer field, showcasing his solitary exercise in a quiet, overcast setting. His long dreadlocks, focused expression, and the serene winter backdrop highlight his dedication to fitness. As he moves, his attire, consisting of a blue sports sweatshirt, black athletic pants, gloves, and sneakers, grips the snowy ground. He is seen running past a chain-link fence enclosing the playground area, with a basketball hoop and children's slide, suggesting a moment of solitary exercise amidst the empty field.",
202
+ # },
203
+ # {
204
+ # "role": "user",
205
+ # "content": 'Create an imaginative video descriptive caption or modify an earlier caption for the user input : " A woman is dancing, HD footage, close-up"',
206
+ # },
207
+ # {
208
+ # "role": "assistant",
209
+ # "content": "A young woman with her hair in an updo and wearing a teal hoodie stands against a light backdrop, initially looking over her shoulder with a contemplative expression. She then confidently makes a subtle dance move, suggesting rhythm and movement. Next, she appears poised and focused, looking directly at the camera. Her expression shifts to one of introspection as she gazes downward slightly. Finally, she dances with confidence, her left hand over her heart, symbolizing a poignant moment, all while dressed in the same teal hoodie against a plain, light-colored background.",
210
+ # },
211
+ # {
212
+ # "role": "user",
213
+ # "content": f'Create an imaginative video descriptive caption or modify an earlier caption in ENGLISH for the user input: "{text}"',
214
+ # },
215
+ # ],
216
+ # model="glm-4-plus",
217
+ # temperature=0.01,
218
+ # top_p=0.7,
219
+ # stream=False,
220
+ # max_tokens=200,
221
+ # )
222
+ # if response.choices:
223
+ # return response.choices[0].message.content
224
+ # return prompt
225
 
226
 
227
  def infer(
 
240
  if video_input is not None:
241
  video = load_video(video_input)[:49] # Limit to 49 frames
242
  pipe_video = CogVideoXVideoToVideoPipeline.from_pretrained(
243
+ "THUDM/CogVideoX1.5-5B-",
244
+ transformer=transformer,
245
+ vae=vae,
246
  scheduler=pipe.scheduler,
247
  tokenizer=pipe.tokenizer,
248
+ text_encoder=text_encoder,
249
  torch_dtype=torch.bfloat16,
250
  ).to(device)
251
+
252
+ pipe_video.enable_model_cpu_offload()
253
+ pipe_video.vae.enable_tiling()
254
+ pipe_video.vae.enable_slicing()
255
  video_pt = pipe_video(
256
  video=video,
257
  prompt=prompt,
 
269
  torch.cuda.empty_cache()
270
  elif image_input is not None:
271
  pipe_image = CogVideoXImageToVideoPipeline.from_pretrained(
272
+ "THUDM/CogVideoX1.5-5B-I2V",
273
  transformer=i2v_transformer,
274
+ vae=i2v_vae,
275
  scheduler=pipe.scheduler,
276
  tokenizer=pipe.tokenizer,
277
+ text_encoder=i2v_text_encoder,
278
  torch_dtype=torch.bfloat16,
279
  ).to(device)
280
  image_input = Image.fromarray(image_input).resize(size=(720, 480)) # Convert to PIL
 
342
  with gr.Blocks() as demo:
343
  gr.Markdown("""
344
  <div style="text-align: center; font-size: 32px; font-weight: bold; margin-bottom: 20px;">
345
+ CogVideoX1.5-5B Huggingface Space🤗
346
  </div>
347
  <div style="text-align: center;">
348
+ <a href="https://huggingface.co/THUDM/CogVideoX1.5-5B">🤗 5B(T2V) Model Hub</a> |
349
+ <a href="https://huggingface.co/THUDM/CogVideoX1.5-5B-I2V">🤗 5B(I2V) Model Hub</a> |
350
  <a href="https://github.com/THUDM/CogVideo">🌐 Github</a> |
351
  <a href="https://arxiv.org/pdf/2408.06072">📜 arxiv </a>
352
  </div>
353
  <div style="text-align: center;display: flex;justify-content: center;align-items: center;margin-top: 1em;margin-bottom: .5em;">
354
  <span>If the Space is too busy, duplicate it to use privately</span>
355
+ <a href="https://huggingface.co/spaces/tsqn/CogVideoX1.5-5B-Space?duplicate=true"><img src="https://huggingface.co/datasets/huggingface/badges/resolve/main/duplicate-this-space-lg.svg" width="160" style="
356
  margin-left: .75em;
357
  "></a>
358
  </div>
 
371
  examples_component_videos = gr.Examples(examples_videos, inputs=[video_input], cache_examples=False)
372
  prompt = gr.Textbox(label="Prompt (Less than 200 Words)", placeholder="Enter your prompt here", lines=5)
373
 
374
+ # with gr.Row():
375
+ # gr.Markdown(
376
+ # "✨Upon pressing the enhanced prompt button, we will use [GLM-4 Model](https://github.com/THUDM/GLM-4) to polish the prompt and overwrite the original one."
377
+ # )
378
+ # enhance_button = gr.Button("✨ Enhance Prompt(Optional)")
379
  with gr.Group():
380
  with gr.Column():
381
  with gr.Row():
382
  seed_param = gr.Number(
383
  label="Inference Seed (Enter a positive number, -1 for random)", value=-1
384
  )
385
+ # with gr.Row():
386
+ # enable_scale = gr.Checkbox(label="Super-Resolution (720 × 480 -> 2880 × 1920)", value=False)
387
+ # enable_rife = gr.Checkbox(label="Frame Interpolation (8fps -> 16fps)", value=False)
388
+ # gr.Markdown(
389
+ # "✨In this demo, we use [RIFE](https://github.com/hzwer/ECCV2022-RIFE) for frame interpolation and [Real-ESRGAN](https://github.com/xinntao/Real-ESRGAN) for upscaling(Super-Resolution).<br>&nbsp;&nbsp;&nbsp;&nbsp;The entire process is based on open-source solutions."
390
+ # )
391
 
392
  generate_button = gr.Button("🎬 Generate Video")
393
 
 
462
  </table>
463
  """)
464
 
465
+ @spaces.GPU(duration=120)
466
  def generate(
467
  prompt,
468
  image_input,
469
  video_input,
470
  video_strength,
471
  seed_value,
472
+ # scale_status,
473
+ # rife_status,
474
  progress=gr.Progress(track_tqdm=True)
475
  ):
476
  latents, seed = infer(
 
478
  image_input,
479
  video_input,
480
  video_strength,
481
+ num_inference_steps=20, # Changed from 50
482
  guidance_scale=7.0, # NOT Changed
483
  seed=seed_value,
484
  progress=progress,
485
  )
486
+ # if scale_status:
487
+ # latents = utils.upscale_batch_and_concatenate(upscale_model, latents, device)
488
+ # if rife_status:
489
+ # latents = rife_inference_with_latents(frame_interpolation_model, latents)
490
 
491
  batch_size = latents.shape[0]
492
  batch_video_frames = []
 
506
 
507
  return video_path, video_update, gif_update, seed_update
508
 
509
+ # def enhance_prompt_func(prompt):
510
+ # return convert_prompt(prompt, retry_times=1)
511
 
512
  generate_button.click(
513
  generate,
514
+ inputs=[prompt, image_input, video_input, strength, seed_param],
515
+ # inputs=[prompt, image_input, video_input, strength, seed_param, enable_scale, enable_rife],
516
  outputs=[video_output, download_video_button, download_gif_button, seed_text],
517
  )
518
 
519
+ # enhance_button.click(enhance_prompt_func, inputs=[prompt], outputs=[prompt])
520
  video_input.upload(resize_if_unfit, inputs=[video_input], outputs=[video_input])
521
 
522
  if __name__ == "__main__":
523
+ utils.install_packages()
524
  demo.queue(max_size=15)
525
  demo.launch()