eliphatfs
commited on
Commit
·
50b3aa3
1
Parent(s):
4dbdaec
Update support library.
Browse files- openshape/demo/caption.py +1 -0
- openshape/demo/classification.py +9 -0
- openshape/demo/misc_utils.py +1 -1
- openshape/demo/sd_pc2img.py +12 -4
openshape/demo/caption.py
CHANGED
@@ -148,6 +148,7 @@ def pc_caption(pc_encoder: torch.nn.Module, pc, cond_scale):
|
|
148 |
ref_dev = next(pc_encoder.parameters()).device
|
149 |
prefix = pc_encoder(torch.tensor(pc.T[None], device=ref_dev))
|
150 |
prefix = prefix.float() * cond_scale
|
|
|
151 |
prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
|
152 |
text = generate2(model, tokenizer, embed=prefix_embed)
|
153 |
return text
|
|
|
148 |
ref_dev = next(pc_encoder.parameters()).device
|
149 |
prefix = pc_encoder(torch.tensor(pc.T[None], device=ref_dev))
|
150 |
prefix = prefix.float() * cond_scale
|
151 |
+
prefix = prefix.to(next(model.parameters()).device)
|
152 |
prefix_embed = model.clip_project(prefix).reshape(1, prefix_length, -1)
|
153 |
text = generate2(model, tokenizer, embed=prefix_embed)
|
154 |
return text
|
openshape/demo/classification.py
CHANGED
@@ -11,3 +11,12 @@ def pred_lvis_sims(pc_encoder: torch.nn.Module, pc):
|
|
11 |
sim = torch.matmul(F.normalize(lvis.feats, dim=-1), F.normalize(enc, dim=-1).squeeze())
|
12 |
argsort = torch.argsort(sim, descending=True)
|
13 |
return OrderedDict((lvis.categories[i], sim[i]) for i in argsort if i < len(lvis.categories))
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
11 |
sim = torch.matmul(F.normalize(lvis.feats, dim=-1), F.normalize(enc, dim=-1).squeeze())
|
12 |
argsort = torch.argsort(sim, descending=True)
|
13 |
return OrderedDict((lvis.categories[i], sim[i]) for i in argsort if i < len(lvis.categories))
|
14 |
+
|
15 |
+
|
16 |
+
@torch.no_grad()
|
17 |
+
def pred_custom_sims(pc_encoder: torch.nn.Module, pc, cats, feats):
|
18 |
+
ref_dev = next(pc_encoder.parameters()).device
|
19 |
+
enc = pc_encoder(torch.tensor(pc[:, [0, 2, 1, 3, 4, 5]].T[None], device=ref_dev)).cpu()
|
20 |
+
sim = torch.matmul(F.normalize(feats, dim=-1), F.normalize(enc, dim=-1).squeeze())
|
21 |
+
argsort = torch.argsort(sim, descending=True)
|
22 |
+
return OrderedDict((cats[i], sim[i]) for i in argsort if i < len(cats))
|
openshape/demo/misc_utils.py
CHANGED
@@ -109,7 +109,7 @@ def input_3d_shape(key=None):
|
|
109 |
objaid = st.text_input("Enter an Objaverse ID", key=objaid_key)
|
110 |
model = st.file_uploader("Or upload a model (.glb/.obj/.ply)", key=model_key)
|
111 |
npy = st.file_uploader("Or upload a point cloud numpy array (.npy of Nx3 XYZ or Nx6 XYZRGB)", key=npy_key)
|
112 |
-
swap_yz_axes = st.
|
113 |
f32 = numpy.float32
|
114 |
|
115 |
def load_data(prog):
|
|
|
109 |
objaid = st.text_input("Enter an Objaverse ID", key=objaid_key)
|
110 |
model = st.file_uploader("Or upload a model (.glb/.obj/.ply)", key=model_key)
|
111 |
npy = st.file_uploader("Or upload a point cloud numpy array (.npy of Nx3 XYZ or Nx6 XYZRGB)", key=npy_key)
|
112 |
+
swap_yz_axes = st.radio("Gravity", ["Y is up (for most Objaverse shapes)", "Z is up"], key=swap_key) == "Z is up"
|
113 |
f32 = numpy.float32
|
114 |
|
115 |
def load_data(prog):
|
openshape/demo/sd_pc2img.py
CHANGED
@@ -13,25 +13,33 @@ class Wrapper(transformers.modeling_utils.PreTrainedModel):
|
|
13 |
return rst.ObjectProxy(image_embeds=x)
|
14 |
|
15 |
|
16 |
-
half = torch.float16 if torch.cuda.is_available() else torch.bfloat16
|
17 |
pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
|
18 |
"diffusers/stable-diffusion-2-1-unclip-i2i-l",
|
19 |
-
|
20 |
image_encoder = Wrapper()
|
21 |
)
|
|
|
|
|
22 |
if torch.cuda.is_available():
|
23 |
pipe = pipe.to('cuda:' + str(torch.cuda.current_device()))
|
24 |
pipe.enable_model_cpu_offload(torch.cuda.current_device())
|
|
|
|
|
25 |
|
26 |
|
27 |
@torch.no_grad()
|
28 |
def pc_to_image(pc_encoder: torch.nn.Module, pc, prompt, noise_level, width, height, cfg_scale, num_steps, callback):
|
29 |
ref_dev = next(pc_encoder.parameters()).device
|
30 |
enc = pc_encoder(torch.tensor(pc.T[None], device=ref_dev))
|
|
|
|
|
|
|
|
|
|
|
31 |
return pipe(
|
32 |
-
prompt=', '.join(["best quality"
|
33 |
negative_prompt="cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry",
|
34 |
-
image=
|
35 |
width=width, height=height,
|
36 |
guidance_scale=cfg_scale,
|
37 |
noise_level=noise_level,
|
|
|
13 |
return rst.ObjectProxy(image_embeds=x)
|
14 |
|
15 |
|
|
|
16 |
pipe = StableUnCLIPImg2ImgPipeline.from_pretrained(
|
17 |
"diffusers/stable-diffusion-2-1-unclip-i2i-l",
|
18 |
+
# variant="fp16",
|
19 |
image_encoder = Wrapper()
|
20 |
)
|
21 |
+
# pe = pipe.text_encoder.text_model.embeddings
|
22 |
+
# pe.position_ids = torch.arange(pe.position_ids.shape[-1]).expand((1, -1)).to(pe.position_ids) # workaround
|
23 |
if torch.cuda.is_available():
|
24 |
pipe = pipe.to('cuda:' + str(torch.cuda.current_device()))
|
25 |
pipe.enable_model_cpu_offload(torch.cuda.current_device())
|
26 |
+
pipe.enable_attention_slicing()
|
27 |
+
pipe.enable_vae_slicing()
|
28 |
|
29 |
|
30 |
@torch.no_grad()
|
31 |
def pc_to_image(pc_encoder: torch.nn.Module, pc, prompt, noise_level, width, height, cfg_scale, num_steps, callback):
|
32 |
ref_dev = next(pc_encoder.parameters()).device
|
33 |
enc = pc_encoder(torch.tensor(pc.T[None], device=ref_dev))
|
34 |
+
enc = torch.nn.functional.normalize(enc, dim=-1) * (768 ** 0.5) / 2
|
35 |
+
if torch.cuda.is_available():
|
36 |
+
enc = enc.to('cuda:' + str(torch.cuda.current_device()))
|
37 |
+
# enc = enc.type(half)
|
38 |
+
# with torch.autocast("cuda"):
|
39 |
return pipe(
|
40 |
+
prompt=', '.join(["best quality"] + ([prompt] if prompt else [])),
|
41 |
negative_prompt="cropped, worst quality, low quality, normal quality, jpeg artifacts, signature, watermark, username, blurry",
|
42 |
+
image=enc,
|
43 |
width=width, height=height,
|
44 |
guidance_scale=cfg_scale,
|
45 |
noise_level=noise_level,
|