search_image_by_image

Running

narugo commited on Nov 15, 2024

Commit

681a350

1 Parent(s): 3c7d8d9

dev(narugo): support multiple sites

Files changed (1) hide show

app.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 import os
 from functools import lru_cache
 from typing import List, Dict
@@ -23,10 +24,14 @@ _ALL_MODEL_NAMES = [
     for path in hf_fs.glob(f'{_REPO_ID}/*/knn.index')
 ]
-def _get_from_ids(ids: List[int]) -> Dict[int, Image.Image]:
     with TemporaryDirectory() as td:
-        datapool = DanbooruWebpDataPool()
         datapool.batch_download_to_directory(
             resource_ids=ids,
             dst_dir=td,
@@ -42,13 +47,20 @@ def _get_from_ids(ids: List[int]) -> Dict[int, Image.Image]:
         return retval
-def _x(x):
-    if isinstance(x, (int, np.integer)):
-        return int(x)
-    elif isinstance(x, (str, np.str_)):
-        return int(str(x).split('_')[-1])
-    else:
-        raise ValueError(f'Invalid ID: {x!r}, type: {type(x)!r}')
 @lru_cache(maxsize=3)
@@ -85,11 +97,10 @@ def search(model_name: str, img_input, n_neighbours: int):
     dists, indexes = knn_index.search(embeddings, k=n_neighbours)
     neighbours_ids = images_ids[indexes][0]
-    neighbours_ids = [_x(x) for x in neighbours_ids]
     captions = []
     images = []
-    ids_to_images = _get_from_ids(neighbours_ids)
     for image_id, dist in zip(neighbours_ids, dists[0]):
         if image_id in ids_to_images:
             images.append(ids_to_images[image_id])

 import json
 import os
+from collections import defaultdict
 from functools import lru_cache
 from typing import List, Dict
     for path in hf_fs.glob(f'{_REPO_ID}/*/knn.index')
 ]
+_SITE_CLS = {
+    'danbooru': DanbooruWebpDataPool,
+}
+def _get_from_ids(site_name: str, ids: List[int]) -> Dict[int, Image.Image]:
     with TemporaryDirectory() as td:
+        datapool = _SITE_CLS[site_name]()
         datapool.batch_download_to_directory(
             resource_ids=ids,
             dst_dir=td,
         return retval
+def _get_from_raw_ids(ids: List[str]) -> Dict[str, Image.Image]:
+    _sites = defaultdict(list)
+    for id_ in ids:
+        site_name, num_id = id_.split('_', maxsplit=1)
+        num_id = int(num_id)
+        _sites[site_name].append(num_id)
+    _retval = {}
+    for site_name, site_ids in _sites.items():
+        _retval.update({
+            f'{site_name}_{id_}': image
+            for id_, image in _get_from_ids(site_name, site_ids)
+        })
+    return _retval
 @lru_cache(maxsize=3)
     dists, indexes = knn_index.search(embeddings, k=n_neighbours)
     neighbours_ids = images_ids[indexes][0]
     captions = []
     images = []
+    ids_to_images = _get_from_raw_ids(neighbours_ids)
     for image_id, dist in zip(neighbours_ids, dists[0]):
         if image_id in ids_to_images:
             images.append(ids_to_images[image_id])