Spaces:

sohojoe
/

soho-clip-embeddings-explorer

Running

App Files Files Community

sohojoe commited on May 7, 2023

Commit

334dcac

1 Parent(s): 0b8f387

experimental ray

Browse files

Files changed (5) hide show

.gitignore +2 -0
experimental/clip_app.py +77 -0
experimental/clip_app_client.py +57 -0
experimental/fast_inference.py +85 -0
local_test.py → experimental/local_test.py +0 -0

.gitignore ADDED Viewed

	@@ -0,0 +1,2 @@


1	+
2	+ *.pyc

experimental/clip_app.py ADDED Viewed

	@@ -0,0 +1,77 @@

+# File name: model.py
+import json
+import os
+import numpy as np
+import torch
+from starlette.requests import Request
+from PIL import Image
+import ray
+from ray import serve
+from clip_retrieval.load_clip import load_clip, get_tokenizer
+# from clip_retrieval.clip_client import ClipClient, Modality
+# @serve.deployment(num_replicas=2, ray_actor_options={"num_cpus": 0.2, "num_gpus": 0.2})
+# @serve.deployment(num_replicas=1, ray_actor_options={"num_cpus": 0.2, "num_gpus": 0.0})
+@serve.deployment(num_replicas=10, ray_actor_options={"num_cpus": .2, "num_gpus": 0.0})
+class CLIPTransform:
+    def __init__(self):
+        # os.environ["OMP_NUM_THREADS"] = "20"
+        # torch.set_num_threads(20)
+        # Load model
+        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        self._clip_model="ViT-L/14"
+        self._clip_model_id ="laion5B-L-14"
+        self.model, self.preprocess = load_clip(self._clip_model, use_jit=True, device=self.device)
+        self.tokenizer = get_tokenizer(self._clip_model)
+        print ("using device", self.device)
+    def text_to_embeddings(self, prompt):
+        text = self.tokenizer([prompt]).to(self.device)
+        with torch.no_grad():
+            prompt_embededdings = self.model.encode_text(text)
+        prompt_embededdings /= prompt_embededdings.norm(dim=-1, keepdim=True)
+        return(prompt_embededdings)
+    def image_to_embeddings(self, input_im):
+        input_im = Image.fromarray(input_im)
+        prepro = self.preprocess(input_im).unsqueeze(0).to(self.device)
+        with torch.no_grad():
+            image_embeddings = self.model.encode_image(prepro)
+        image_embeddings /= image_embeddings.norm(dim=-1, keepdim=True)
+        return(image_embeddings)
+    def preprocessed_image_to_emdeddings(self, prepro):
+        with torch.no_grad():
+            image_embeddings = self.model.encode_image(prepro)
+        image_embeddings /= image_embeddings.norm(dim=-1, keepdim=True)
+        return(image_embeddings)
+    async def __call__(self, http_request: Request) -> str:
+        request = await http_request.json()
+        # print(type(request))
+        # print(str(request))
+        # switch based if we are using text or image
+        embeddings = None
+        if "text" in request:
+            prompt = request["text"]
+            embeddings = self.text_to_embeddings(prompt)
+        elif "image" in request:
+            image_url = request["image_url"]
+            # download image from url
+            import requests
+            from io import BytesIO
+            input_image = Image.open(BytesIO(image_url))
+            input_image = input_image.convert('RGB')
+            input_image = np.array(input_image)
+            embeddings = self.image_to_embeddings(input_image)
+        elif "preprocessed_image" in request:
+            prepro = request["preprocessed_image"]
+            # create torch tensor on the device
+            prepro = torch.tensor(prepro).to(self.device)
+            embeddings = self.preprocessed_image_to_emdeddings(prepro)
+        else:
+            raise Exception("Invalid request")
+        return embeddings.cpu().numpy().tolist()
+deployment_graph = CLIPTransform.bind()

experimental/clip_app_client.py ADDED Viewed

	@@ -0,0 +1,57 @@

+# File name: graph_client.py
+from concurrent.futures import ThreadPoolExecutor
+import json
+import requests
+from concurrent.futures import ThreadPoolExecutor, as_completed
+import time
+# english_text = (
+#     "It was the best of times, it was the worst of times, it was the age "
+#     "of wisdom, it was the age of foolishness, it was the epoch of belief"
+# )
+# response = requests.post("http://127.0.0.1:8000/", json=english_text)
+# french_text = response.text
+# print(french_text)
+test_image_url = "https://static.wixstatic.com/media/4d6b49_42b9435ce1104008b1b5f7a3c9bfcd69~mv2.jpg/v1/fill/w_454,h_333,fp_0.50_0.50,q_90/4d6b49_42b9435ce1104008b1b5f7a3c9bfcd69~mv2.jpg"
+english_text = (
+    "It was the best of times, it was the worst of times, it was the age "
+    "of wisdom, it was the age of foolishness, it was the epoch of belief"
+)
+def send_text_request(number):
+    json = {"text": english_text}
+    response = requests.post("http://127.0.0.1:8000/", json=json)
+    embeddings = response.text
+    return number, embeddings
+def process_text(numbers, max_workers=10):
+    with ThreadPoolExecutor(max_workers=max_workers) as executor:
+        futures = [executor.submit(send_text_request, number) for number in numbers]
+        for future in as_completed(futures):
+            n_result, result = future.result()
+            result = json.loads(result)
+            print (f"{n_result} : {len(result[0])}")
+# def process_text(numbers, max_workers=10):
+#     for n in numbers:
+#         n_result, result = send_text_request(n)
+#         result = json.loads(result)
+#         print (f"{n_result} : {len(result[0])}")
+if __name__ == "__main__":
+    # n_calls = 100000
+    n_calls = 1000
+    numbers = list(range(n_calls))
+    start_time = time.monotonic()
+    process_text(numbers)
+    end_time = time.monotonic()
+    total_time = end_time - start_time
+    avg_time_ms = total_time / n_calls * 1000
+    calls_per_sec = n_calls / total_time
+    print(f"Average time taken: {avg_time_ms:.2f} ms")
+    print(f"Number of calls per second: {calls_per_sec:.2f}")

experimental/fast_inference.py ADDED Viewed

	@@ -0,0 +1,85 @@

+import time
+import numpy as np
+import torch
+from PIL import Image
+import ray
+from ray import serve
+from clip_retrieval.load_clip import load_clip, get_tokenizer
+# from clip_retrieval.clip_client import ClipClient, Modality
+class CLIPModel:
+    def __init__(self):
+        self.device = "cuda:0" if torch.cuda.is_available() else "cpu"
+        self._test_image_url = "https://static.wixstatic.com/media/4d6b49_42b9435ce1104008b1b5f7a3c9bfcd69~mv2.jpg/v1/fill/w_454,h_333,fp_0.50_0.50,q_90/4d6b49_42b9435ce1104008b1b5f7a3c9bfcd69~mv2.jpg"
+        self._clip_model="ViT-L/14"
+        self._clip_model_id ="laion5B-L-14"
+        self.model, self.preprocess = load_clip(self._clip_model, use_jit=True, device=self.device)
+        self.tokenizer = get_tokenizer(self._clip_model)
+        print ("using device", self.device)
+    def test_to_embeddings(self, prompt):
+        text = self.tokenizer([prompt]).to(self.device)
+        with torch.no_grad():
+            prompt_embededdings = self.model.encode_text(text)
+        prompt_embededdings /= prompt_embededdings.norm(dim=-1, keepdim=True)
+        return(prompt_embededdings)
+    def image_to_embeddings(self, input_im):
+        input_im = Image.fromarray(input_im)
+        prepro = self.preprocess(input_im).unsqueeze(0).to(self.device)
+        with torch.no_grad():
+            image_embeddings = self.model.encode_image(prepro)
+        image_embeddings /= image_embeddings.norm(dim=-1, keepdim=True)
+        return(image_embeddings)
+    def preprocessed_image_to_emdeddings(self, prepro):
+        with torch.no_grad():
+            image_embeddings = self.model.encode_image(prepro)
+        image_embeddings /= image_embeddings.norm(dim=-1, keepdim=True)
+        return(image_embeddings)
+    # simple regression test
+    def regression_test(self):
+        text_embeddings = self.test_to_embeddings("Howdy!")
+        print("text embeddings", text_embeddings)
+        # download image from url
+        import requests
+        from io import BytesIO
+        response = requests.get(self._test_image_url)
+        input_image = Image.open(BytesIO(response.content))
+        input_image = input_image.convert('RGB')
+        # convert image to numpy array
+        input_image = np.array(input_image)
+        image_embeddings = self.image_to_embeddings(input_image)
+        print("image embeddings", image_embeddings)
+        input_im = Image.fromarray(input_image)
+        prepro = self.preprocess(input_im).unsqueeze(0).to(self.device)
+        image_embeddings = self.preprocessed_image_to_emdeddings(prepro)
+        print("image embeddings", image_embeddings)
+# regression test
+test_instance = CLIPModel()
+test_instance.regression_test()
+ray.init()
+serve.start()
+# Register the model with Ray Serve
+serve.create_backend("clip_model", CLIPModel)
+serve.create_endpoint("clip_model", backend="clip_model", route="/clip_model")
+# You can now call the endpoint with your input
+import requests
+input_prompt = "Howdy!"
+response = requests.get("http://localhost:8000/clip_model", json={"prompt": input_prompt})
+print(response.json())

local_test.py → experimental/local_test.py RENAMED Viewed

File without changes