Spaces:

deepghs
/

danbooru_character_search

Running

App Files Files Community

narugo commited on May 8

Commit

dfcc607

•

1 Parent(s): f474bbc

dev(narugo): better metrics

Browse files

Files changed (2) hide show

app.py +5 -4
index.py +22 -6

app.py CHANGED Viewed

@@ -6,8 +6,8 @@ from PIL import Image
 from index import query_character
-def _fn(image: Image.Image, count: int = 5):
-    return query_character(image, count)
 if __name__ == '__main__':
@@ -15,7 +15,8 @@ if __name__ == '__main__':
         with gr.Row():
             with gr.Column():
                 gr_input_image = gr.Image(type='pil', label='Original Image')
-                gr_max_count = gr.Slider(minimum=1, maximum=20, step=1, value=5, label='Max Query Count')
                 gr_submit = gr.Button(value='Submit', variant='primary')
             with gr.Column():
@@ -28,7 +29,7 @@ if __name__ == '__main__':
             gr_submit.click(
                 _fn,
-                inputs=[gr_input_image, gr_max_count],
                 outputs=[gr_gallery, gr_table],
             )

 from index import query_character
+def _fn(image: Image.Image, count: int = 10, threshold: float = 0.8):
+    return query_character(image, count, order_by='same_ratio', threshold=threshold)
 if __name__ == '__main__':
         with gr.Row():
             with gr.Column():
                 gr_input_image = gr.Image(type='pil', label='Original Image')
+                gr_max_count = gr.Slider(minimum=1, maximum=30, step=1, value=10, label='Max Query Count')
+                gr_threshold = gr.Slider(minimum=0.0, maximum=0.99, step=0.01, value=0.8, label='Threshold')
                 gr_submit = gr.Button(value='Submit', variant='primary')
             with gr.Column():
             gr_submit.click(
                 _fn,
+                inputs=[gr_input_image, gr_max_count, gr_threshold],
                 outputs=[gr_gallery, gr_table],
             )

index.py CHANGED Viewed

@@ -8,7 +8,7 @@ from autofaiss import build_index
 from hfutils.operate import get_hf_fs
 from huggingface_hub import hf_hub_download
 from imgutils.data import load_image
-from imgutils.metrics import ccip_batch_extract_features
 SRC_REPO = 'deepghs/character_index'
@@ -36,7 +36,7 @@ def gender_predict(p):
         return 'not_sure'
-def query_character(image: Image.Image, count: int = 5):
     (index, index_infos), tag_infos = _make_index()
     query = ccip_batch_extract_features([image])
     assert query.shape == (1, 768)
@@ -44,7 +44,7 @@ def query_character(image: Image.Image, count: int = 5):
     all_dists, all_indices = index.search(query, k=count)
     dists, indices = all_dists[0], all_indices[0]
-    images, records = [], []
     for dist, idx in zip(dists, indices):
         info = tag_infos[idx]
         current_image = load_image(hf_hub_download(
@@ -52,14 +52,30 @@ def query_character(image: Image.Image, count: int = 5):
             repo_type='dataset',
             filename=f'{info["hprefix"]}/{info["short_tag"]}/1.webp'
         ))
-        images.append((current_image, f'{info["tag"]} ({dist:.3f})'))
         records.append({
             'id': info['id'],
             'tag': info['tag'],
             'gender': gender_predict(info['gender']),
             'copyright': info['copyright'],
-            'score': dist,
         })
     df_records = pd.DataFrame(records)
-    return images, df_records

 from hfutils.operate import get_hf_fs
 from huggingface_hub import hf_hub_download
 from imgutils.data import load_image
+from imgutils.metrics import ccip_batch_extract_features, ccip_batch_differences, ccip_default_threshold
 SRC_REPO = 'deepghs/character_index'
         return 'not_sure'
+def query_character(image: Image.Image, count: int = 5, order_by: str = 'same_ratio', threshold: float = 0.7):
     (index, index_infos), tag_infos = _make_index()
     query = ccip_batch_extract_features([image])
     assert query.shape == (1, 768)
     all_dists, all_indices = index.search(query, k=count)
     dists, indices = all_dists[0], all_indices[0]
+    images, records = {}, []
     for dist, idx in zip(dists, indices):
         info = tag_infos[idx]
         current_image = load_image(hf_hub_download(
             repo_type='dataset',
             filename=f'{info["hprefix"]}/{info["short_tag"]}/1.webp'
         ))
+        feats = np.load(hf_hub_download(
+            repo_id=SRC_REPO,
+            repo_type='dataset',
+            filename=f'{info["hprefix"]}/{info["short_tag"]}/feat.npy'
+        ))
+        diffs = ccip_batch_differences([query[0], *feats])[0, 1:]
+        images[info['tag']] = current_image
         records.append({
             'id': info['id'],
             'tag': info['tag'],
             'gender': gender_predict(info['gender']),
             'copyright': info['copyright'],
+            'index_score': dist,
+            'mean_diff': diffs.mean(),
+            'same_ratio': (diffs < ccip_default_threshold()).mean(),
         })
     df_records = pd.DataFrame(records)
+    df_records = df_records.sort_values(
+        by=[order_by, 'index_score'] if order_by != 'index_score' else ['index_score'],
+        ascending=[False, False] if order_by != 'index_score' else [False],
+    )
+    df_records = df_records[df_records[order_by] >= threshold]
+    ret_images = []
+    for row_item in df_records.to_dict('records'):
+        ret_images.append((images[row_item['tag']], f'{row_item["tag"]} ({row_item[order_by]:.3f})'))
+    return ret_images, df_records