Spaces:

NbAiLab
/

maken-clip-sketch

Runtime error

App Files Files Community

versae commited on Oct 15, 2021

Commit

0f14459

1 Parent(s): 193f456

Maken CLIP Sketch

Browse files

Files changed (6) hide show

app.py +114 -0
class_names.txt +100 -0
clip.csv +3 -0
clip.npy +3 -0
pytorch_model.bin +3 -0
requirements.txt +6 -0

app.py ADDED Viewed

	@@ -0,0 +1,114 @@

+import os
+from pathlib import Path
+import pandas as pd, numpy as np
+from transformers import CLIPProcessor, CLIPTextModel, CLIPModel
+import torch
+from torch import nn
+import gradio as gr
+import requests
+from PIL import Image, ImageFile
+ImageFile.LOAD_TRUNCATED_IMAGES = True
+LABELS = Path('class_names.txt').read_text().splitlines()
+class_model = nn.Sequential(
+    nn.Conv2d(1, 32, 3, padding='same'),
+    nn.ReLU(),
+    nn.MaxPool2d(2),
+    nn.Conv2d(32, 64, 3, padding='same'),
+    nn.ReLU(),
+    nn.MaxPool2d(2),
+    nn.Conv2d(64, 128, 3, padding='same'),
+    nn.ReLU(),
+    nn.MaxPool2d(2),
+    nn.Flatten(),
+    nn.Linear(1152, 256),
+    nn.ReLU(),
+    nn.Linear(256, len(LABELS)),
+)
+state_dict = torch.load('pytorch_model.bin', map_location='cpu')
+class_model.load_state_dict(state_dict, strict=False)
+class_model.eval()
+model = CLIPModel.from_pretrained("openai/clip-vit-base-patch32")
+processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
+df =  pd.read_csv('clip.csv')
+embeddings_npy = np.load('clip.npy')
+embeddings = np.divide(embeddings_npy, np.sqrt(np.sum(embeddings_npy**2, axis=1, keepdims=True)))
+def compute_text_embeddings(list_of_strings):
+    inputs = processor(text=list_of_strings, return_tensors="pt", padding=True)
+    return model.get_text_features(**inputs)
+def compute_image_embeddings(list_of_images):
+    inputs = processor(images=list_of_images, return_tensors="pt", padding=True)
+    return model.get_image_features(**inputs)
+def load_image(image, same_height=False):
+    # im = Image.open(path)
+    im = Image.fromarray(np.uint8(image))
+    if im.mode != 'RGB':
+        im = im.convert('RGB')
+    if same_height:
+        ratio = 224/im.size[1]
+        return im.resize((int(im.size[0]*ratio), int(im.size[1]*ratio)))
+    else:
+        ratio = 224/min(im.size)
+        return im.resize((int(im.size[0]*ratio), int(im.size[1]*ratio)))
+def download_img(identifier, url):
+    local_path = f"{identifier}.jpg"
+    if not os.path.isfile(local_path):
+        img_data = requests.get(url).content
+        with open(local_path, 'wb') as handler:
+            handler.write(img_data)
+    return local_path
+def predict(image=None, text=None, sketch=None):
+    if image is not None:
+        input_embeddings = compute_image_embeddings([load_image(image)]).detach().numpy()
+        topk = {"local": 100}
+    else:
+        if text:
+            query = text
+            topk = {text: 100}
+        else:
+            x = torch.tensor(sketch, dtype=torch.float32).unsqueeze(0).unsqueeze(0) / 255.
+            with torch.no_grad():
+                out = class_model(x)
+            probabilities = torch.nn.functional.softmax(out[0], dim=0)
+            values, indices = torch.topk(probabilities, 5)
+            query = LABELS[indices[0]]
+            topk = {LABELS[i]: v.item() for i, v in zip(indices, values)}
+        input_embeddings = compute_text_embeddings([query]).detach().numpy()
+    n_results = 3
+    results = np.argsort((embeddings @ input_embeddings.T)[:, 0])[-1:-n_results - 1:-1]
+    outputs = [download_img(df.iloc[i]['id'], df.iloc[i]['thumbnail']) for i in results]
+    outputs.insert(0, topk)
+    print(outputs)
+    return outputs
+def predict_sketch(sketch):
+    return predict(None, None, sketch)
+title = "Type or draw to search in the Nasjonalbiblioteket"
+description = "Find images in the Nasjonalbiblioteket image collections based on what you draw or type"
+interface = gr.Interface(
+  fn=[predict_sketch],
+  inputs=["sketch"],
+  outputs=[gr.outputs.Label(num_top_classes=3)] + 3 * [gr.outputs.Image(type="file")],
+  title=title,
+  description=description,
+  live=True
+)
+interface.launch(debug=True)

class_names.txt ADDED Viewed

	@@ -0,0 +1,100 @@

+airplane
+alarm_clock
+anvil
+apple
+axe
+baseball
+baseball_bat
+basketball
+beard
+bed
+bench
+bicycle
+bird
+book
+bread
+bridge
+broom
+butterfly
+camera
+candle
+car
+cat
+ceiling_fan
+cell_phone
+chair
+circle
+clock
+cloud
+coffee_cup
+cookie
+cup
+diving_board
+donut
+door
+drums
+dumbbell
+envelope
+eye
+eyeglasses
+face
+fan
+flower
+frying_pan
+grapes
+hammer
+hat
+headphones
+helmet
+hot_dog
+ice_cream
+key
+knife
+ladder
+laptop
+light_bulb
+lightning
+line
+lollipop
+microphone
+moon
+mountain
+moustache
+mushroom
+pants
+paper_clip
+pencil
+pillow
+pizza
+power_outlet
+radio
+rainbow
+rifle
+saw
+scissors
+screwdriver
+shorts
+shovel
+smiley_face
+snake
+sock
+spider
+spoon
+square
+star
+stop_sign
+suitcase
+sun
+sword
+syringe
+t-shirt
+table
+tennis_racquet
+tent
+tooth
+traffic_light
+tree
+triangle
+umbrella
+wheel
+wristwatch

clip.csv ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:b4ecd243d042c4b0c0f93de5df51a444b9e3076c17dee21313403e066ec15750
+size 10444275

clip.npy ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:8f1cee3eb68b43e8af5c5aab5955c473d76104e2568815d9c9285f4ba079d6ac
+size 112642176

pytorch_model.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:effb6ea6f1593c09e8247944028ed9c309b5ff1cef82ba38b822bee2ca4d0f3c
+size 1656903

requirements.txt ADDED Viewed

	@@ -0,0 +1,6 @@

+torch
+transformers
+numpy
+pandas
+ftfy
+pillow