k4d3
/

toolkit

Safetensors

Model card Files Files and versions Community

k4d3 commited on Oct 2

Commit

ee41534

•

1 Parent(s): f8b95db

joy updates (we havent tested yet) and remove_extra_whitespace

Browse files

Files changed (2) hide show

joy +143 -24
remove_extra_whitespace +60 -0

joy CHANGED Viewed

@@ -18,6 +18,7 @@ import os
 import argparse
 import re
 import random
 from pathlib import Path
 from typing import List, Tuple, Dict
 from PIL import Image
@@ -199,6 +200,33 @@ class ImageAdapter(nn.Module):
             torch.tensor([2], device=self.other_tokens.weight.device)
         ).squeeze(0)
 class JoyCaptionModel:
     """
@@ -302,11 +330,20 @@ class JoyCaptionModel:
         caption_type: str,
         caption_tone: str,
         caption_length: str | int,
-        custom_prompt: str | None = None,
-    ) -> str:
         """
-        Process an input image and generate a caption based on specified
-        parameters.
         """
         torch.cuda.empty_cache()
@@ -326,12 +363,18 @@ class JoyCaptionModel:
             embedded_images, prompt
         )
-        generate_ids = self._generate_caption(inputs_embeds,
-                                              input_ids,
-                                              attention_mask)
         caption = self._decode_caption(generate_ids, input_ids)
-        return caption.strip()
     def generate_valid_caption(
         self,
@@ -340,29 +383,62 @@ class JoyCaptionModel:
         caption_tone: str,
         caption_length: str | int,
         custom_prompt: str | None = None,
     ) -> str:
         """
-        Generate a valid caption, retrying if the caption contains only special
-        characters, does not end with a period, exclamation mark, or question
-        mark, contains the word fluffy more than once, repeats any word longer
-        than 4 characters multiple times, or contains only one sentence.
         """
         while True:
-            caption = self.process_image(
                 input_image, caption_type, caption_tone,
                 caption_length, custom_prompt
             )
-            words = re.findall(r'\b\w{5,}\b', caption.lower())
-            word_counts = {word: words.count(word) for word in set(words)}
             sentence_count = len(re.findall(r'[.!?]', caption))
-            if (re.search(r'\w', caption) and
-                    caption[-1] in {'.', '!', '?'} and
-                    caption.lower().count('fluffy') <= 1 and
-                    all(count == 1 for count in word_counts.values()) and
-                    sentence_count > 1):
                 return caption
-            print(f"Generated caption is invalid. Retrying...\nCaption: {caption!r}")
     def _get_prompt_string(self, caption_type, caption_tone, caption_length):
         length = None if caption_length == "any" else caption_length
@@ -498,6 +574,49 @@ class JoyCaptionModel:
         )[0]
         return caption
 def main():
     """
@@ -619,7 +738,7 @@ def main():
                     args, image_path, tagset_normalizer
                 )
-            print(f"Custom prompt: {custom_prompt}")
             caption = joy_caption_model.generate_valid_caption(
                 input_image,
@@ -849,7 +968,7 @@ def prompt_from_tags(args, image_path: Path,
     custom_prompt = ' '.join(s for s in [
         "Write a descriptive caption for this image",
         artist_txt, species_txt, character_txt, copyright_txt,
-        "in a formal tone. Use these tags to construct your caption:",
         tag_string,
     ] if s)
     return custom_prompt

 import argparse
 import re
 import random
+import math
 from pathlib import Path
 from typing import List, Tuple, Dict
 from PIL import Image
             torch.tensor([2], device=self.other_tokens.weight.device)
         ).squeeze(0)
+STOP_WORDS: set[str] = {
+    "the", "a", "an", "and", "or", "but", "in", "on", "at", "to", "for",
+    "of", "with", "by", "from", "up", "down", "is", "are", "was", "were",
+    "be", "been", "being", "have", "has", "had", "do", "does", "did",
+    "will", "would", "shall", "should", "can", "could", "may", "might",
+    "must", "ought", "i", "you", "he", "she", "it", "we", "they", "them",
+    "their", "this", "that", "these", "those", "am", "is", "are", "was",
+    "were", "be", "been", "being", "have", "has", "had", "do", "does",
+    "did", "will", "would", "shall", "should", "can", "could", "may",
+    "might", "must", "ought", "i'm", "you're", "he's", "she's", "it's",
+    "we're", "they're", "i've", "you've", "we've", "they've", "i'd",
+    "you'd", "he'd", "she'd", "we'd", "they'd", "i'll", "you'll",
+    "he'll", "she'll", "we'll", "they'll", "isn't", "aren't", "wasn't",
+    "weren't", "hasn't", "haven't", "hadn't", "doesn't", "don't",
+    "didn't", "won't", "wouldn't", "shan't", "shouldn't", "can't",
+    "cannot", "couldn't", "mustn't", "let's", "that's", "who's",
+    "what's", "here's", "there's", "when's", "where's", "why's", "how's",
+    "a", "an", "the", "and", "but", "if", "or", "because", "as", "until",
+    "while", "of", "at", "by", "for", "with", "about", "against",
+    "between", "into", "through", "during", "before", "after", "above",
+    "below", "to", "from", "up", "down", "in", "out", "on", "off", "over",
+    "under", "again", "further", "then", "once", "here", "there", "when",
+    "where", "why", "how", "all", "any", "both", "each", "few", "more",
+    "most", "other", "some", "such", "no", "nor", "not", "only", "own",
+    "same", "so", "than", "too", "very"
+}
 class JoyCaptionModel:
     """
         caption_type: str,
         caption_tone: str,
         caption_length: str | int,
+        custom_prompt: str | None = None
+    ) -> Tuple[str, float, float]:
         """
+        Process the input image and generate a caption.
+        Args:
+            input_image (Image.Image): The input image to caption.
+            caption_type (str): The type of caption to generate.
+            caption_tone (str): The tone of the caption.
+            caption_length (str | int): The desired length of the caption.
+            custom_prompt (str | None): A custom prompt for caption generation.
+        Returns:
+            Tuple[str, float, float]: A tuple containing the generated caption, its entropy, and its perplexity.
         """
         torch.cuda.empty_cache()
             embedded_images, prompt
         )
+        generate_ids = self._generate_caption(inputs_embeds, input_ids, attention_mask)
         caption = self._decode_caption(generate_ids, input_ids)
+        # Calculate entropy
+        token_ids = generate_ids[0].tolist()
+        entropy = self._calculate_entropy(token_ids)
+        # Calculate perplexity
+        loss = self._calculate_perplexity(generate_ids, input_ids)
+        perplexity = math.exp(-loss)
+        return caption.strip(), entropy, perplexity
     def generate_valid_caption(
         self,
         caption_tone: str,
         caption_length: str | int,
         custom_prompt: str | None = None,
+        *,
+        limited_words: Dict[str, int] = {"fluffy": 2},
+        min_sentence_count: int = 3,
+        max_word_repetitions: int = 5,
+        min_entropy: float = 1.75,
+        max_perplexity: float = 100.0,
+        stop_words: set[str] = STOP_WORDS
     ) -> str:
         """
+        Generate a valid caption, retrying if certain conditions are not met.
+        Args:
+            input_image (Image.Image): The input image to caption.
+            caption_type (str): The type of caption to generate.
+            caption_tone (str): The tone of the caption.
+            caption_length (str | int): The desired length of the caption.
+            custom_prompt (str | None): A custom prompt for caption generation.
+            limited_words (Dict[str, int]): Dictionary of words with their maximum allowed occurrences. Default is {"fluffy": 2}.
+            min_sentence_count (int): Minimum required number of sentences. Default is 3.
+            max_word_repetitions (int): Maximum allowed repetitions for words longer than 4 characters. Default is 5.
+            min_entropy (float): Minimum required entropy of the caption. Default is 1.75.
+            max_perplexity (float): Maximum allowed perplexity of the caption. Default is 100.0.
+            stop_words (set[str]): Set of stop words to exclude from repetition checks. Default is STOP_WORDS.
+        Returns:
+            str: A valid caption meeting all specified criteria.
         """
         while True:
+            caption, entropy, perplexity = self.process_image(
                 input_image, caption_type, caption_tone,
                 caption_length, custom_prompt
             )
+            words = re.findall(r'\b\w+\b', caption.lower())
+            word_counts = {word: words.count(word) for word in set(words) if word not in stop_words}
             sentence_count = len(re.findall(r'[.!?]', caption))
+            if not re.search(r'\w', caption):
+                print(f"Retrying: Caption contains only special characters.\nCaption: {caption!r}")
+            elif caption[-1] not in {'.', '!', '?'}:
+                print(f"Retrying: Caption does not end with proper punctuation.\nCaption: {caption!r}")
+            elif any(caption.lower().count(word) > max_count for word, max_count in limited_words.items()):
+                exceeded_words = [f"{word} ({caption.lower().count(word)}/{max_count})"
+                                  for word, max_count in limited_words.items()
+                                  if caption.lower().count(word) > max_count]
+                print(f"Retrying: Limited words exceeded: {', '.join(exceeded_words)}.\nCaption: {caption!r}")
+            elif any(count > max_word_repetitions for word, count in word_counts.items() if len(word) > 4):
+                repeated_words = [word for word, count in word_counts.items() if count > max_word_repetitions and len(word) > 4]
+                print(f"Retrying: Words repeated more than {max_word_repetitions} times: {', '.join(repeated_words)}.\nCaption: {caption!r}")
+            elif sentence_count < min_sentence_count:
+                print(f"Retrying: Only {sentence_count} sentences (min: {min_sentence_count}).\nCaption: {caption!r}")
+            elif entropy < min_entropy:
+                print(f"Retrying: Low entropy ({entropy:.2f} < {min_entropy}).\nCaption: {caption!r}")
+            elif perplexity > max_perplexity:
+                print(f"Retrying: High perplexity ({perplexity:.2f} > {max_perplexity}).\nCaption: {caption!r}")
+            else:
                 return caption
     def _get_prompt_string(self, caption_type, caption_tone, caption_length):
         length = None if caption_length == "any" else caption_length
         )[0]
         return caption
+    def _calculate_entropy(self, token_ids: List[int]) -> float:
+        """
+        Calculate the entropy of a sequence of token IDs.
+        Args:
+            token_ids (List[int]): List of token IDs.
+        Returns:
+            float: Entropy of the token sequence.
+        """
+        token_counts = {}
+        total_tokens = len(token_ids)
+        for token_id in token_ids:
+            token_counts[token_id] = token_counts.get(token_id, 0) + 1
+        entropy = 0
+        for count in token_counts.values():
+            probability = count / total_tokens
+            entropy -= probability * math.log2(probability)
+        return entropy
+    def _calculate_perplexity(self, generate_ids, input_ids):
+        """
+        Calculate the perplexity of the generated caption.
+        Args:
+            generate_ids (torch.Tensor): Generated token IDs.
+            input_ids (torch.Tensor): Input token IDs.
+        Returns:
+            float: Perplexity of the generated caption.
+        """
+        with torch.no_grad():
+            outputs = self.text_model(
+                input_ids=input_ids,
+                labels=generate_ids,
+                output_hidden_states=True,
+            )
+            loss = outputs.loss
+        return loss.item()
 def main():
     """
                     args, image_path, tagset_normalizer
                 )
+            print(f"\nCaptioning {image_path}...\nCustom prompt: {custom_prompt}")
             caption = joy_caption_model.generate_valid_caption(
                 input_image,
     custom_prompt = ' '.join(s for s in [
         "Write a descriptive caption for this image",
         artist_txt, species_txt, character_txt, copyright_txt,
+        "in a formal tone. Limit yourself to two paragraphs, avoid repeating yourself and think before you type anything. Use these tags to construct your caption:",
         tag_string,
     ] if s)
     return custom_prompt

remove_extra_whitespace ADDED Viewed

	@@ -0,0 +1,60 @@

+#!/usr/bin/env python
+# -*- coding: utf-8 -*-
+"""
+This script removes all extra spaces (more than one) and new line characters (truncating to one single character)
+from all *.caption and *.txt files in a target directory recursively. If no target directory is provided as an
+argument, it processes the current directory.
+Usage:
+    python script_name.py [target_directory]
+Args:
+    target_directory (str, optional): The path to the target directory. If not provided, the current directory is used.
+"""
+import os
+import sys
+import glob
+def remove_extra_spaces_and_newlines(file_path):
+    """
+    Removes extra spaces (more than one) and new line characters from the given file.
+    Truncates the text to a single space or new line character without removing any text.
+    Args:
+        file_path (str): The path to the file to be processed.
+    """
+    with open(file_path, 'r', encoding='utf-8') as file:
+        content = file.read()
+    # Replace multiple spaces with a single space
+    content = ' '.join(content.split())
+    # Replace multiple newlines with a single newline
+    content = '\n'.join(line.strip() for line in content.split('\n'))
+    with open(file_path, 'w', encoding='utf-8') as file:
+        file.write(content)
+def process_files_in_directory(directory):
+    """
+    Processes all *.caption and *.txt files in the given directory recursively.
+    Removes extra spaces and new line characters from each file.
+    Args:
+        directory (str): The path to the directory to be processed.
+    """
+    for file_path in glob.glob(os.path.join(directory, '**', '*.caption'), recursive=True):
+        remove_extra_spaces_and_newlines(file_path)
+    for file_path in glob.glob(os.path.join(directory, '**', '*.txt'), recursive=True):
+        remove_extra_spaces_and_newlines(file_path)
+if __name__ == "__main__":
+    if len(sys.argv) > 1:
+        target_directory = sys.argv[1]
+    else:
+        target_directory = os.getcwd()
+    process_files_in_directory(target_directory)