Spaces:
Runtime error
Runtime error
Create device.py
Browse files
device.py
ADDED
@@ -0,0 +1,43 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
from base64 import b64encode
|
2 |
+
import numpy
|
3 |
+
import torch
|
4 |
+
from diffusers import AutoencoderKL, LMSDiscreteScheduler, UNet2DConditionModel
|
5 |
+
from huggingface_hub import notebook_login
|
6 |
+
|
7 |
+
# For video display:
|
8 |
+
from IPython.display import HTML
|
9 |
+
from matplotlib import pyplot as plt
|
10 |
+
from pathlib import Path
|
11 |
+
from PIL import Image
|
12 |
+
from torch import autocast
|
13 |
+
from torchvision import transforms as tfms
|
14 |
+
from tqdm.auto import tqdm
|
15 |
+
from transformers import CLIPTextModel, CLIPTokenizer, logging
|
16 |
+
import os
|
17 |
+
torch_device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
|
18 |
+
|
19 |
+
|
20 |
+
|
21 |
+
# Load the autoencoder model which will be used to decode the latents into image space.
|
22 |
+
vae = AutoencoderKL.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="vae")
|
23 |
+
|
24 |
+
# Load the tokenizer and text encoder to tokenize and encode the text.
|
25 |
+
tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
|
26 |
+
text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
|
27 |
+
|
28 |
+
# The UNet model for generating the latents.
|
29 |
+
unet = UNet2DConditionModel.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="unet")
|
30 |
+
|
31 |
+
# The noise scheduler
|
32 |
+
scheduler = LMSDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=1000)
|
33 |
+
|
34 |
+
# To the GPU we go!
|
35 |
+
vae = vae.to(torch_device)
|
36 |
+
text_encoder = text_encoder.to(torch_device)
|
37 |
+
unet = unet.to(torch_device);
|
38 |
+
|
39 |
+
|
40 |
+
token_emb_layer = text_encoder.text_model.embeddings.token_embedding
|
41 |
+
pos_emb_layer = text_encoder.text_model.embeddings.position_embedding
|
42 |
+
position_ids = text_encoder.text_model.embeddings.position_ids[:, :77]
|
43 |
+
position_embeddings = pos_emb_layer(position_ids)
|