Spaces:

Johnny-Z
/

danbooru_tagger

Running

App Files Files Community

Johnny-Z commited on 18 days ago

Commit

16cf4a9

verified ·

1 Parent(s): fe53e5c

Upload 3 files

Browse files

Files changed (3) hide show

aesthetic_predictor_ava.pth +3 -0
app.py +73 -17
cls_predictor.pth +1 -1

aesthetic_predictor_ava.pth ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f4363c5bfea88c84ae55a55be5ba4c11de4853a87cedb1253373e81b592e2598
+size 29545526

app.py CHANGED Viewed

@@ -8,7 +8,15 @@ import gradio as gr
 TITLE = "Danbooru Tagger"
 DESCRIPTION = """
-Macro F1 (General & Character): 0.4937
 """
 kaomojis = [
@@ -112,6 +120,52 @@ mlp_artist.load_state_dict(artist_s)
 mlp_artist.to(device)
 mlp_artist.eval()
 def prediction_to_tag(prediction, tag_dict, class_num, general_threshold, character_threshold, artist_threshold):
     prediction = prediction.view(class_num)
     predicted_ids = (prediction>= 0.2).nonzero(as_tuple=True)[0].cpu().numpy() + 1
@@ -137,17 +191,15 @@ def prediction_to_tag(prediction, tag_dict, class_num, general_threshold, charac
     general = dict(sorted(general.items(), key=lambda item: item[1], reverse=True))
     character = dict(sorted(character.items(), key=lambda item: item[1], reverse=True))
     if date:
         date = {max(date, key=date.get): date[max(date, key=date.get)]}
     if rating:
         rating = {max(rating, key=rating.get): rating[max(rating, key=rating.get)]}
-    if artist:
-        artist = {max(artist, key=artist.get): artist[max(artist, key=artist.get)]}
     return general, character, artist, date, rating
 def process_image(image, general_threshold, character_threshold, artist_threshold):
     try:
         image = image.convert('RGBA')
@@ -177,7 +229,7 @@ def process_image(image, general_threshold, character_threshold, artist_threshol
         print(f"Error opening image: {e}")
         return
-    with torch.no_grad():
         summary, features = model(pixel_values)
         outputs = summary.to(torch.float32)
@@ -195,7 +247,9 @@ def process_image(image, general_threshold, character_threshold, artist_threshol
     artist_tags = artist_[2]
     date = artist_[3]
-    combined_tags = {**artist_tags, **character_tags, **general_tags}
     tags_list = [tag for tag in combined_tags]
     remove_list = []
@@ -208,12 +262,12 @@ def process_image(image, general_threshold, character_threshold, artist_threshol
     tags_str = ", ".join(tags_list).replace("(", "\(").replace(")", "\)")
-    return tags_str, artist_tags, character_tags, general_tags, rating, date
 def parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser()
     parser.add_argument("--slider-step", type=float, default=0.01)
-    parser.add_argument("--general-threshold", type=float, default=0.5)
     parser.add_argument("--character-threshold", type=float, default=0.8)
     parser.add_argument("--artist-threshold", type=float, default=0.68)
     return parser.parse_args()
@@ -226,9 +280,9 @@ def main():
             gr.Markdown(
                 value=f"<h1 style='text-align: center; margin-bottom: 1rem'>{TITLE}</h1>"
             )
-            gr.Markdown(value=DESCRIPTION)
             with gr.Row():
                 with gr.Column(variant="panel"):
                     image = gr.Image(type="pil", image_mode="RGBA", label="Input")
                     with gr.Row():
                         general_threshold = gr.Slider(
@@ -239,7 +293,6 @@ def main():
                             label="General Threshold",
                             scale=3,
                         )
-                    with gr.Row():
                         character_threshold = gr.Slider(
                             0,
                             1,
@@ -248,7 +301,6 @@ def main():
                             label="Character Threshold",
                             scale=3,
                         )
-                    with gr.Row():
                         artist_threshold = gr.Slider(
                             0,
                             1,
@@ -265,13 +317,16 @@ def main():
                             variant="secondary",
                             size="lg",
                         )
-                        submit = gr.Button(value="Submit", variant="primary", size="lg")
                 with gr.Column(variant="panel"):
                     tags_str = gr.Textbox(label="Output")
                     artist_tags = gr.Label(label="Artist")
-                    character_tags = gr.Label(label="Characters")
-                    rating = gr.Label(label="Rating")
-                    date = gr.Label(label="Year")
                     general_tags = gr.Label(label="General")
                     clear.add(
                         [
@@ -280,7 +335,8 @@ def main():
                             general_tags,
                             character_tags,
                             rating,
-                            date
                         ]
                     )
@@ -292,7 +348,7 @@ def main():
                 character_threshold,
                 artist_threshold
             ],
-            outputs=[tags_str, artist_tags, character_tags, general_tags, rating, date],
         )
     demo.queue(max_size=10)

 TITLE = "Danbooru Tagger"
 DESCRIPTION = """
+## Dataset
+- Source: Cleaned Danbooru
+- Last Update: December 28, 2024
+## Metrics
+- Validation Split: 10% of images
+- Validation Results (Macro F1 Score):
+    - General & Character: 0.4916
+    - Artist: 0.6677
 """
 kaomojis = [
 mlp_artist.to(device)
 mlp_artist.eval()
+class AES(nn.Module):
+    def __init__(self, input_size):
+        super().__init__()
+        self.layers0 = nn.Sequential(
+            nn.Linear(input_size, 1280),
+            nn.LayerNorm(1280),
+            nn.Mish()
+        )
+        self.layers1 = nn.Sequential(
+            nn.Sigmoid()
+        )
+        self.layers2 = nn.Sequential(
+            nn.Linear(1280, 640),
+            nn.LayerNorm(640),
+            nn.Mish(),
+            nn.Dropout(0.2),
+            nn.Linear(640, 1)
+        )
+        self.layers3 = nn.Sequential(
+            nn.Linear(1280, 640),
+            nn.LayerNorm(640),
+            nn.Mish(),
+            nn.Dropout(0.2),
+            nn.Linear(640, 1)
+        )
+        self.layers4 = nn.Sequential(
+            nn.Linear(1280, 640),
+            nn.LayerNorm(640),
+            nn.Mish(),
+            nn.Dropout(0.2),
+            nn.Linear(640, 1)
+        )
+    def forward(self, x):
+        out = self.layers0(x)
+        out = self.layers2(out) + self.layers3(out) + self.layers4(out)
+        out = self.layers1(out)
+        return out * 10
+mlp_ava = AES(3840)
+ava_s = torch.load("aesthetic_predictor_ava.pth", map_location=device)
+mlp_ava.load_state_dict(ava_s)
+mlp_ava.to(device)
+mlp_ava.eval()
 def prediction_to_tag(prediction, tag_dict, class_num, general_threshold, character_threshold, artist_threshold):
     prediction = prediction.view(class_num)
     predicted_ids = (prediction>= 0.2).nonzero(as_tuple=True)[0].cpu().numpy() + 1
     general = dict(sorted(general.items(), key=lambda item: item[1], reverse=True))
     character = dict(sorted(character.items(), key=lambda item: item[1], reverse=True))
+    artist = dict(sorted(artist.items(), key=lambda item: item[1], reverse=True))
     if date:
         date = {max(date, key=date.get): date[max(date, key=date.get)]}
     if rating:
         rating = {max(rating, key=rating.get): rating[max(rating, key=rating.get)]}
     return general, character, artist, date, rating
 def process_image(image, general_threshold, character_threshold, artist_threshold):
     try:
         image = image.convert('RGBA')
         print(f"Error opening image: {e}")
         return
+    with torch.no_grad(), torch.autocast('cuda', dtype=torch.bfloat16):
         summary, features = model(pixel_values)
         outputs = summary.to(torch.float32)
     artist_tags = artist_[2]
     date = artist_[3]
+    ava_score = round(mlp_ava(outputs).item(), 3)
+    combined_tags = {**character_tags, **general_tags}
     tags_list = [tag for tag in combined_tags]
     remove_list = []
     tags_str = ", ".join(tags_list).replace("(", "\(").replace(")", "\)")
+    return tags_str, artist_tags, character_tags, general_tags, rating, date, ava_score
 def parse_args() -> argparse.Namespace:
     parser = argparse.ArgumentParser()
     parser.add_argument("--slider-step", type=float, default=0.01)
+    parser.add_argument("--general-threshold", type=float, default=0.61)
     parser.add_argument("--character-threshold", type=float, default=0.8)
     parser.add_argument("--artist-threshold", type=float, default=0.68)
     return parser.parse_args()
             gr.Markdown(
                 value=f"<h1 style='text-align: center; margin-bottom: 1rem'>{TITLE}</h1>"
             )
             with gr.Row():
                 with gr.Column(variant="panel"):
+                    submit = gr.Button(value="Submit", variant="primary", size="lg")
                     image = gr.Image(type="pil", image_mode="RGBA", label="Input")
                     with gr.Row():
                         general_threshold = gr.Slider(
                             label="General Threshold",
                             scale=3,
                         )
                         character_threshold = gr.Slider(
                             0,
                             1,
                             label="Character Threshold",
                             scale=3,
                         )
                         artist_threshold = gr.Slider(
                             0,
                             1,
                             variant="secondary",
                             size="lg",
                         )
+                    gr.Markdown(value=DESCRIPTION)
                 with gr.Column(variant="panel"):
                     tags_str = gr.Textbox(label="Output")
+                    with gr.Row():
+                        ava_score = gr.Textbox(label="Aesthetic Score (AVA)")
+                    with gr.Row():
+                        rating = gr.Label(label="Rating")
+                        date = gr.Label(label="Year")
                     artist_tags = gr.Label(label="Artist")
+                    character_tags = gr.Label(label="Character")
                     general_tags = gr.Label(label="General")
                     clear.add(
                         [
                             general_tags,
                             character_tags,
                             rating,
+                            date,
+                            ava_score
                         ]
                     )
                 character_threshold,
                 artist_threshold
             ],
+            outputs=[tags_str, artist_tags, character_tags, general_tags, rating, date, ava_score],
         )
     demo.queue(max_size=10)

cls_predictor.pth CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:b0bb58f320b941f20d9c3b9e3af4dc87780d9cf3f9d50be7a72b684028cd7763
 size 54599508

 version https://git-lfs.github.com/spec/v1
+oid sha256:f5a6373053dad15af8b8cc2a6830bd04f67d35ff04acc5f071c34cb5d8c05305
 size 54599508