from PIL import Image import torch from transformers import ( AutoImageProcessor, AutoModelForImageClassification, ) import gradio as gr import spaces # ZERO GPU MODEL_NAMES = ["p1atdev/wd-swinv2-tagger-v3-hf"] MODEL_NAME = MODEL_NAMES[0] model = AutoModelForImageClassification.from_pretrained( MODEL_NAME, ) model.to("cuda" if torch.cuda.is_available() else "cpu") processor = AutoImageProcessor.from_pretrained(MODEL_NAME, trust_remote_code=True) # ref: https://qiita.com/tregu148/items/fccccbbc47d966dd2fc2 def gradio_copy_text(_text: None): gr.Info("Copied!") COPY_ACTION_JS = """\ (inputs, _outputs) => { // inputs is the string value of the input_text if (inputs.trim() !== "") { navigator.clipboard.writeText(inputs); } }""" def _people_tag(noun: str, minimum: int = 1, maximum: int = 5): return ( [f"1{noun}"] + [f"{num}{noun}s" for num in range(minimum + 1, maximum + 1)] + [f"{maximum+1}+{noun}s"] ) PEOPLE_TAGS = ( _people_tag("girl") + _people_tag("boy") + _people_tag("other") + ["no humans"] ) RATING_MAP = { "general": "safe", "sensitive": "sensitive", "questionable": "nsfw", "explicit": "explicit, nsfw", } RATING_MAP_E621 = { "general": "rating_safe", "sensitive": "rating_safe", "questionable": "rating_questionable", "explicit": "rating_explicit", } DESCRIPTION_MD = """ # WD Tagger with 🤗 transformers Currently supports the following model(s): - [p1atdev/wd-swinv2-tagger-v3-hf](https://huggingface.co/p1atdev/wd-swinv2-tagger-v3-hf) """.strip() def character_list_to_series_list(character_list): def get_series_dict(): import re with open('characterfull.txt', 'r') as f: lines = f.readlines() series_dict = {} for line in lines: parts = line.strip().split(', ') if len(parts) >= 3: name = parts[-2].replace("\\", "") if name.endswith(")"): names = name.split("(") character_name = "(".join(names[:-1]) if character_name.endswith(" "): name = character_name[:-1] series = re.sub(r'\\[()]', '', parts[-1]) series_dict[name] = series return series_dict output_series_tag = [] series_tag = "" series_dict = get_series_dict() for tag in character_list: series_tag = series_dict.get(tag, "") if tag.endswith(")"): tags = tag.split("(") character_tag = "(".join(tags[:-1]) if character_tag.endswith(" "): character_tag = character_tag[:-1] series_tag = tags[-1].replace(")", "") if series_tag: output_series_tag.append(series_tag) return output_series_tag def get_e621_dict(): with open('danbooru_e621.csv', 'r', encoding="utf-8") as f: lines = f.readlines() e621_dict = {} for line in lines: parts = line.strip().split(',') e621_dict[parts[0]] = parts[1] return e621_dict def danbooru_to_e621(dtag, e621_dict): def d_to_e(match, e621_dict): dtag = match.group(0) etag = e621_dict.get(dtag.strip().replace("_", " "), "") if etag: return etag else: return dtag import re tag = re.sub(r'[\w ]+', lambda wrapper: d_to_e(wrapper, e621_dict), dtag, 2) return tag def postprocess_results( results: dict[str, float], general_threshold: float, character_threshold: float ): results = { k: v for k, v in sorted(results.items(), key=lambda item: item[1], reverse=True) } rating = {} character = {} general = {} for k, v in results.items(): if k.startswith("rating:"): rating[k.replace("rating:", "")] = v continue elif k.startswith("character:"): character[k.replace("character:", "")] = v continue general[k] = v character = {k: v for k, v in character.items() if v >= character_threshold} general = {k: v for k, v in general.items() if v >= general_threshold} return rating, character, general def animagine_prompt(rating: list[str], character: list[str], general: list[str], tag_type): people_tags: list[str] = [] other_tags: list[str] = [] if tag_type == "e621": rating_tag = RATING_MAP_E621[rating[0]] else: rating_tag = RATING_MAP[rating[0]] e621_dict = get_e621_dict() for tag in general: if tag_type == "e621": tag = danbooru_to_e621(tag, e621_dict) if tag in PEOPLE_TAGS: people_tags.append(tag) else: other_tags.append(tag) output_series_tag = character_list_to_series_list(character) all_tags = people_tags + character + output_series_tag + other_tags + [rating_tag] return ", ".join(all_tags) @spaces.GPU(enable_queue=True) def predict_tags( image: Image.Image, general_threshold: float = 0.3, character_threshold: float = 0.8, tag_type = "danbooru" ): inputs = processor.preprocess(image, return_tensors="pt") outputs = model(**inputs.to(model.device, model.dtype)) logits = torch.sigmoid(outputs.logits[0]) # take the first logits # get probabilities results = { model.config.id2label[i]: float(logit.float()) for i, logit in enumerate(logits) } # rating, character, general rating, character, general = postprocess_results( results, general_threshold, character_threshold ) prompt = animagine_prompt( list(rating.keys()), list(character.keys()), list(general.keys()), tag_type ) return rating, character, general, prompt, gr.update(interactive=True,) def demo(): with gr.Blocks() as ui: gr.Markdown(DESCRIPTION_MD) with gr.Row(): with gr.Column(): input_image = gr.Image(label="Input image", type="pil") with gr.Group(): general_threshold = gr.Slider( label="Threshold", minimum=0.0, maximum=1.0, value=0.3, step=0.01, interactive=True, ) character_threshold = gr.Slider( label="Character threshold", minimum=0.0, maximum=1.0, value=0.8, step=0.01, interactive=True, ) tag_type = gr.Radio( label="Output tag conversion", info="danbooru for Animagine, e621 for Pony.", choices=["danbooru", "e621"], value="danbooru", ) _model_radio = gr.Dropdown( choices=MODEL_NAMES, label="Model", value=MODEL_NAMES[0], interactive=True, ) start_btn = gr.Button(value="Start", variant="primary") with gr.Column(): with gr.Group(): prompt_text = gr.TextArea(label="Prompt", interactive=False) copy_btn = gr.Button(value="Copy to clipboard", interactive=False) rating_tags_label = gr.Label(label="Rating tags") character_tags_label = gr.Label(label="Character tags") general_tags_label = gr.Label(label="General tags") start_btn.click( predict_tags, inputs=[input_image, general_threshold, character_threshold, tag_type], outputs=[ rating_tags_label, character_tags_label, general_tags_label, prompt_text, copy_btn, ], ) copy_btn.click(gradio_copy_text, inputs=[prompt_text], js=COPY_ACTION_JS) return ui if __name__ == "__main__": demo().queue().launch()