k4d3
/

toolkit

Safetensors

Model card Files Files and versions Community

k4d3 commited on about 20 hours ago

Commit

3f04019

•

1 Parent(s): c057738

way too much effort for emojis

Browse files

Files changed (1) hide show

txt2emoji +127 -11

txt2emoji CHANGED Viewed

@@ -18,6 +18,7 @@ from nltk.tokenize import word_tokenize
 from emoji import EMOJI_DATA
 import argparse
 from pathlib import Path
 # Download required NLTK data (only needed once)
 nltk.download('punkt', quiet=True)
@@ -47,19 +48,135 @@ def text_to_emojis(text):
     # Create emoji mapping and variations
     emoji_map, emoji_variations = get_emoji_mapping()
     # Emojis to exclude
-    excluded_emojis = {'🔶', '⭕', '🔷', '🔹', '🔸', '🔺', '🔻', '🔴', '🔵', '🔼', '🔽', '🔾', '🇵🇬', '🀄', '🔲', '✅'}
     # Words to exclude from emoji conversion
-    excluded_words = {'big', 'small', 'the', 'a', 'an', 'and', 'or', 'but', 'if', 'then', 'because', 'as', 'until', 'while', ','}
     # Track used emojis and their variations
     used_emojis = set()
     # Additional manual mappings for common words
     custom_mappings = {
         'cum': '💦',
         'love': '❤️',
         'cat': '😺',
         'cats': '😺',
         'dog': '🐶',
@@ -105,9 +222,8 @@ def text_to_emojis(text):
     # Process each token
     for token in tokens:
-        # Skip excluded words
-        if token in excluded_words:
-            explanations.append(f"'{token}' → (skipped - excluded word)")
             continue
         # First check custom mappings
@@ -117,8 +233,8 @@ def text_to_emojis(text):
                 found_emojis.append(emoji)
                 used_emojis.add(emoji)
                 explanations.append(f"'{token}' → {emoji} (custom mapping)")
-            else:
-                explanations.append(f"'{token}' → (skipped - emoji {emoji} already used)")
             continue
         # Then check emoji mapping
@@ -133,10 +249,10 @@ def text_to_emojis(text):
                     break
             if not found_match:
                 available_emojis = [e for e in emoji_map[token] if e not in excluded_emojis]
-                if available_emojis:
-                    explanations.append(f"'{token}' → (skipped - all matching emojis {', '.join(available_emojis)} already used)")
-                else:
-                    explanations.append(f"'{token}' → (skipped - all matching emojis are excluded)")
         else:
             explanations.append(f"'{token}' → (no matching emoji found)")

 from emoji import EMOJI_DATA
 import argparse
 from pathlib import Path
+import re
 # Download required NLTK data (only needed once)
 nltk.download('punkt', quiet=True)
     # Create emoji mapping and variations
     emoji_map, emoji_variations = get_emoji_mapping()
+    # Regex pattern to match any token containing numbers
+    number_pattern = re.compile(r'.*\d+.*')
     # Emojis to exclude
+    excluded_emojis = {
+        '🔶',
+        '⭕',
+        '🔷',
+        '🔹',
+        '🔸',
+        '🔺',
+        '🔻',
+        '🔴',
+        '🔵',
+        '🔼',
+        '🔾',
+        '🇵🇬',
+        '🀄',
+        '🔲',
+        '✅'
+    }
     # Words to exclude from emoji conversion
+    excluded_words = {
+        '(',
+        ')',
+        'purple',
+        'abdominal',
+        'penetration',
+        'feral',
+        'body',
+        'nude',
+        'anthro',
+        'big',
+        'small',
+        'the',
+        'a',
+        'an',
+        'and',
+        'or',
+        'but',
+        'if',
+        'then',
+        'because',
+        'as',
+        'until',
+        'while',
+        ',',
+        'hi',
+        'res',
+        'pussy'
+        'penetrated',
+        'equine',
+        'felid',
+        'feline',
+        'equid',
+        'genital',
+        'genitals',
+        'penetrating',
+        'medial',
+        'ring',
+        'inside',
+        'duo',
+        'solo',
+        'in',
+        'hair',
+        'andromorph',
+        'from',
+        'behind',
+        'position',
+        'pantherine',
+        'animal',
+        'brown',
+        'sub',
+        'dom',
+        'explicit',
+        'black',
+        'bulge',
+        'dominant',
+        'kousen',
+        'rendan',
+        'genitalia',
+        'tan',
+        'simple',
+        'media',
+        'vaginal',
+        'red',
+        'pecs',
+        'navel',
+        'background',
+        'pubes',
+    }
     # Track used emojis and their variations
     used_emojis = set()
     # Additional manual mappings for common words
     custom_mappings = {
+        'markings': '🏷️',
+        'sweat': '💧',
+        'toes': '👣',
+        'teeth': '🦷',
+        'fingering': '👉',
+        'blush': '😊',
+        'male': '♂️',
+        'tiger': '🐯',
+        'fluids': '💧',
+        'wolf': '🐺',
+        'dog': '🐶',
+        'female': '♀️',
+        'intersex': '⚧️',
+        'muscular': '💪',
+        'wheelbarrow': '🚜',
+        'sex': '💑',
+        'size': '📏',
+        'difference': '🔢',
+        'penis': '🔱',
+        'paws': '🐾',
+        'pawpads': '🐾',
+        'hindpaw': '🐾',
+        'fur': '🧥',
+        'horse': '🐴',
+        #'pussy': '',
+        'ejaculation': '💦',
         'cum': '💦',
         'love': '❤️',
+        'smaller': '🔽',
+        'bigger': '🔼',
+        'larger': '🔼',
         'cat': '😺',
         'cats': '😺',
         'dog': '🐶',
     # Process each token
     for token in tokens:
+        # Skip excluded words and anything containing numbers
+        if token in excluded_words or number_pattern.match(token):
             continue
         # First check custom mappings
                 found_emojis.append(emoji)
                 used_emojis.add(emoji)
                 explanations.append(f"'{token}' → {emoji} (custom mapping)")
+            #else:
+            #    explanations.append(f"'{token}' → (skipped - emoji {emoji} already used)")
             continue
         # Then check emoji mapping
                     break
             if not found_match:
                 available_emojis = [e for e in emoji_map[token] if e not in excluded_emojis]
+                #if available_emojis:
+                #    explanations.append(f"'{token}' → (skipped - all matching emojis {', '.join(available_emojis)} already used)")
+                #else:
+                #    explanations.append(f"'{token}' → (skipped - all matching emojis are excluded)")
         else:
             explanations.append(f"'{token}' → (no matching emoji found)")