Gosula commited on
Commit
79415d5
·
1 Parent(s): c2d600d

Create device.py

Browse files
Files changed (1) hide show
  1. device.py +43 -0
device.py ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from base64 import b64encode
2
+ import numpy
3
+ import torch
4
+ from diffusers import AutoencoderKL, LMSDiscreteScheduler, UNet2DConditionModel
5
+ from huggingface_hub import notebook_login
6
+
7
+ # For video display:
8
+ from IPython.display import HTML
9
+ from matplotlib import pyplot as plt
10
+ from pathlib import Path
11
+ from PIL import Image
12
+ from torch import autocast
13
+ from torchvision import transforms as tfms
14
+ from tqdm.auto import tqdm
15
+ from transformers import CLIPTextModel, CLIPTokenizer, logging
16
+ import os
17
+ torch_device = "cuda" if torch.cuda.is_available() else "mps" if torch.backends.mps.is_available() else "cpu"
18
+
19
+
20
+
21
+ # Load the autoencoder model which will be used to decode the latents into image space.
22
+ vae = AutoencoderKL.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="vae")
23
+
24
+ # Load the tokenizer and text encoder to tokenize and encode the text.
25
+ tokenizer = CLIPTokenizer.from_pretrained("openai/clip-vit-large-patch14")
26
+ text_encoder = CLIPTextModel.from_pretrained("openai/clip-vit-large-patch14")
27
+
28
+ # The UNet model for generating the latents.
29
+ unet = UNet2DConditionModel.from_pretrained("CompVis/stable-diffusion-v1-4", subfolder="unet")
30
+
31
+ # The noise scheduler
32
+ scheduler = LMSDiscreteScheduler(beta_start=0.00085, beta_end=0.012, beta_schedule="scaled_linear", num_train_timesteps=1000)
33
+
34
+ # To the GPU we go!
35
+ vae = vae.to(torch_device)
36
+ text_encoder = text_encoder.to(torch_device)
37
+ unet = unet.to(torch_device);
38
+
39
+
40
+ token_emb_layer = text_encoder.text_model.embeddings.token_embedding
41
+ pos_emb_layer = text_encoder.text_model.embeddings.position_embedding
42
+ position_ids = text_encoder.text_model.embeddings.position_ids[:, :77]
43
+ position_embeddings = pos_emb_layer(position_ids)