Spaces:
Sleeping
Sleeping
import os | |
import math | |
import torch | |
import logging | |
import subprocess | |
import numpy as np | |
import torch.distributed as dist | |
# from torch._six import inf | |
from torch import inf | |
from PIL import Image | |
from typing import Union, Iterable | |
from collections import OrderedDict | |
from torch.utils.tensorboard import SummaryWriter | |
from typing import Dict | |
import torch_dct | |
from diffusers.utils import is_bs4_available, is_ftfy_available | |
import html | |
import re | |
import urllib.parse as ul | |
if is_bs4_available(): | |
from bs4 import BeautifulSoup | |
if is_ftfy_available(): | |
import ftfy | |
import torch.fft as fft | |
_tensor_or_tensors = Union[torch.Tensor, Iterable[torch.Tensor]] | |
################################################################################# | |
# Testing Utils # | |
################################################################################# | |
def find_model(model_name): | |
""" | |
Finds a pre-trained model | |
""" | |
assert os.path.isfile(model_name), f'Could not find DiT checkpoint at {model_name}' | |
checkpoint = torch.load(model_name, map_location=lambda storage, loc: storage) | |
if "ema" in checkpoint: # supports checkpoints from train.py | |
print('Using ema ckpt!') | |
checkpoint = checkpoint["ema"] | |
else: | |
checkpoint = checkpoint["model"] | |
print("Using model ckpt!") | |
return checkpoint | |
def save_video_grid(video, nrow=None): | |
b, t, h, w, c = video.shape | |
if nrow is None: | |
nrow = math.ceil(math.sqrt(b)) | |
ncol = math.ceil(b / nrow) | |
padding = 1 | |
video_grid = torch.zeros((t, (padding + h) * nrow + padding, | |
(padding + w) * ncol + padding, c), dtype=torch.uint8) | |
# print(video_grid.shape) | |
for i in range(b): | |
r = i // ncol | |
c = i % ncol | |
start_r = (padding + h) * r | |
start_c = (padding + w) * c | |
video_grid[:, start_r:start_r + h, start_c:start_c + w] = video[i] | |
return video_grid | |
def save_videos_grid_tav(videos: torch.Tensor, path: str, rescale=False, nrow=None, fps=8): | |
from einops import rearrange | |
import imageio | |
import torchvision | |
b, _, _, _, _ = videos.shape | |
if nrow is None: | |
nrow = math.ceil(math.sqrt(b)) | |
videos = rearrange(videos, "b c t h w -> t b c h w") | |
outputs = [] | |
for x in videos: | |
x = torchvision.utils.make_grid(x, nrow=nrow) | |
x = x.transpose(0, 1).transpose(1, 2).squeeze(-1) | |
if rescale: | |
x = (x + 1.0) / 2.0 # -1,1 -> 0,1 | |
x = (x * 255).numpy().astype(np.uint8) | |
outputs.append(x) | |
# os.makedirs(os.path.dirname(path), exist_ok=True) | |
imageio.mimsave(path, outputs, fps=fps) | |
################################################################################# | |
# MMCV Utils # | |
################################################################################# | |
def collect_env(): | |
# Copyright (c) OpenMMLab. All rights reserved. | |
from mmcv.utils import collect_env as collect_base_env | |
from mmcv.utils import get_git_hash | |
"""Collect the information of the running environments.""" | |
env_info = collect_base_env() | |
env_info['MMClassification'] = get_git_hash()[:7] | |
for name, val in env_info.items(): | |
print(f'{name}: {val}') | |
print(torch.cuda.get_arch_list()) | |
print(torch.version.cuda) | |
################################################################################# | |
# DCT Functions # | |
################################################################################# | |
def dct_low_pass_filter(dct_coefficients, percentage=0.3): # 2d [b c f h w] | |
""" | |
Applies a low pass filter to the given DCT coefficients. | |
:param dct_coefficients: 2D tensor of DCT coefficients | |
:param percentage: percentage of coefficients to keep (between 0 and 1) | |
:return: 2D tensor of DCT coefficients after applying the low pass filter | |
""" | |
# Determine the cutoff indices for both dimensions | |
cutoff_x = int(dct_coefficients.shape[-2] * percentage) | |
cutoff_y = int(dct_coefficients.shape[-1] * percentage) | |
# Create a mask with the same shape as the DCT coefficients | |
mask = torch.zeros_like(dct_coefficients) | |
# Set the top-left corner of the mask to 1 (the low-frequency area) | |
mask[:, :, :, :cutoff_x, :cutoff_y] = 1 | |
return mask | |
def normalize(tensor): | |
"""将Tensor归一化到[0, 1]范围内。""" | |
min_val = tensor.min() | |
max_val = tensor.max() | |
normalized = (tensor - min_val) / (max_val - min_val) | |
return normalized | |
def denormalize(tensor, max_val_target, min_val_target): | |
"""将Tensor从[0, 1]范围反归一化到目标的[min_val_target, max_val_target]范围。""" | |
denormalized = tensor * (max_val_target - min_val_target) + min_val_target | |
return denormalized | |
def exchanged_mixed_dct_freq(noise, base_content, LPF_3d, normalized=False): | |
# noise dct | |
noise_freq = torch_dct.dct_3d(noise, 'ortho') | |
# frequency | |
HPF_3d = 1 - LPF_3d | |
noise_freq_high = noise_freq * HPF_3d | |
# base frame dct | |
base_content_freq = torch_dct.dct_3d(base_content, 'ortho') | |
# base content low frequency | |
base_content_freq_low = base_content_freq * LPF_3d | |
# mixed frequency | |
mixed_freq = base_content_freq_low + noise_freq_high | |
# idct | |
mixed_freq = torch_dct.idct_3d(mixed_freq, 'ortho') | |
return mixed_freq |