way too much effort for emojis
Browse files
txt2emoji
CHANGED
@@ -18,6 +18,7 @@ from nltk.tokenize import word_tokenize
|
|
18 |
from emoji import EMOJI_DATA
|
19 |
import argparse
|
20 |
from pathlib import Path
|
|
|
21 |
|
22 |
# Download required NLTK data (only needed once)
|
23 |
nltk.download('punkt', quiet=True)
|
@@ -47,19 +48,135 @@ def text_to_emojis(text):
|
|
47 |
# Create emoji mapping and variations
|
48 |
emoji_map, emoji_variations = get_emoji_mapping()
|
49 |
|
|
|
|
|
|
|
50 |
# Emojis to exclude
|
51 |
-
excluded_emojis = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
52 |
|
53 |
# Words to exclude from emoji conversion
|
54 |
-
excluded_words = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
55 |
|
56 |
# Track used emojis and their variations
|
57 |
used_emojis = set()
|
58 |
|
59 |
# Additional manual mappings for common words
|
60 |
custom_mappings = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
61 |
'cum': 'π¦',
|
62 |
'love': 'β€οΈ',
|
|
|
|
|
|
|
63 |
'cat': 'πΊ',
|
64 |
'cats': 'πΊ',
|
65 |
'dog': 'πΆ',
|
@@ -105,9 +222,8 @@ def text_to_emojis(text):
|
|
105 |
|
106 |
# Process each token
|
107 |
for token in tokens:
|
108 |
-
# Skip excluded words
|
109 |
-
if token in excluded_words:
|
110 |
-
explanations.append(f"'{token}' β (skipped - excluded word)")
|
111 |
continue
|
112 |
|
113 |
# First check custom mappings
|
@@ -117,8 +233,8 @@ def text_to_emojis(text):
|
|
117 |
found_emojis.append(emoji)
|
118 |
used_emojis.add(emoji)
|
119 |
explanations.append(f"'{token}' β {emoji} (custom mapping)")
|
120 |
-
else:
|
121 |
-
|
122 |
continue
|
123 |
|
124 |
# Then check emoji mapping
|
@@ -133,10 +249,10 @@ def text_to_emojis(text):
|
|
133 |
break
|
134 |
if not found_match:
|
135 |
available_emojis = [e for e in emoji_map[token] if e not in excluded_emojis]
|
136 |
-
if available_emojis:
|
137 |
-
|
138 |
-
else:
|
139 |
-
|
140 |
else:
|
141 |
explanations.append(f"'{token}' β (no matching emoji found)")
|
142 |
|
|
|
18 |
from emoji import EMOJI_DATA
|
19 |
import argparse
|
20 |
from pathlib import Path
|
21 |
+
import re
|
22 |
|
23 |
# Download required NLTK data (only needed once)
|
24 |
nltk.download('punkt', quiet=True)
|
|
|
48 |
# Create emoji mapping and variations
|
49 |
emoji_map, emoji_variations = get_emoji_mapping()
|
50 |
|
51 |
+
# Regex pattern to match any token containing numbers
|
52 |
+
number_pattern = re.compile(r'.*\d+.*')
|
53 |
+
|
54 |
# Emojis to exclude
|
55 |
+
excluded_emojis = {
|
56 |
+
'πΆ',
|
57 |
+
'β',
|
58 |
+
'π·',
|
59 |
+
'πΉ',
|
60 |
+
'πΈ',
|
61 |
+
'πΊ',
|
62 |
+
'π»',
|
63 |
+
'π΄',
|
64 |
+
'π΅',
|
65 |
+
'πΌ',
|
66 |
+
'πΎ',
|
67 |
+
'π΅π¬',
|
68 |
+
'π',
|
69 |
+
'π²',
|
70 |
+
'β
'
|
71 |
+
}
|
72 |
|
73 |
# Words to exclude from emoji conversion
|
74 |
+
excluded_words = {
|
75 |
+
'(',
|
76 |
+
')',
|
77 |
+
'purple',
|
78 |
+
'abdominal',
|
79 |
+
'penetration',
|
80 |
+
'feral',
|
81 |
+
'body',
|
82 |
+
'nude',
|
83 |
+
'anthro',
|
84 |
+
'big',
|
85 |
+
'small',
|
86 |
+
'the',
|
87 |
+
'a',
|
88 |
+
'an',
|
89 |
+
'and',
|
90 |
+
'or',
|
91 |
+
'but',
|
92 |
+
'if',
|
93 |
+
'then',
|
94 |
+
'because',
|
95 |
+
'as',
|
96 |
+
'until',
|
97 |
+
'while',
|
98 |
+
',',
|
99 |
+
'hi',
|
100 |
+
'res',
|
101 |
+
'pussy'
|
102 |
+
'penetrated',
|
103 |
+
'equine',
|
104 |
+
'felid',
|
105 |
+
'feline',
|
106 |
+
'equid',
|
107 |
+
'genital',
|
108 |
+
'genitals',
|
109 |
+
'penetrating',
|
110 |
+
'medial',
|
111 |
+
'ring',
|
112 |
+
'inside',
|
113 |
+
'duo',
|
114 |
+
'solo',
|
115 |
+
'in',
|
116 |
+
'hair',
|
117 |
+
'andromorph',
|
118 |
+
'from',
|
119 |
+
'behind',
|
120 |
+
'position',
|
121 |
+
'pantherine',
|
122 |
+
'animal',
|
123 |
+
'brown',
|
124 |
+
'sub',
|
125 |
+
'dom',
|
126 |
+
'explicit',
|
127 |
+
'black',
|
128 |
+
'bulge',
|
129 |
+
'dominant',
|
130 |
+
'kousen',
|
131 |
+
'rendan',
|
132 |
+
'genitalia',
|
133 |
+
'tan',
|
134 |
+
'simple',
|
135 |
+
'media',
|
136 |
+
'vaginal',
|
137 |
+
'red',
|
138 |
+
'pecs',
|
139 |
+
'navel',
|
140 |
+
'background',
|
141 |
+
'pubes',
|
142 |
+
}
|
143 |
|
144 |
# Track used emojis and their variations
|
145 |
used_emojis = set()
|
146 |
|
147 |
# Additional manual mappings for common words
|
148 |
custom_mappings = {
|
149 |
+
'markings': 'π·οΈ',
|
150 |
+
'sweat': 'π§',
|
151 |
+
'toes': 'π£',
|
152 |
+
'teeth': 'π¦·',
|
153 |
+
'fingering': 'π',
|
154 |
+
'blush': 'π',
|
155 |
+
'male': 'βοΈ',
|
156 |
+
'tiger': 'π―',
|
157 |
+
'fluids': 'π§',
|
158 |
+
'wolf': 'πΊ',
|
159 |
+
'dog': 'πΆ',
|
160 |
+
'female': 'βοΈ',
|
161 |
+
'intersex': 'β§οΈ',
|
162 |
+
'muscular': 'πͺ',
|
163 |
+
'wheelbarrow': 'π',
|
164 |
+
'sex': 'π',
|
165 |
+
'size': 'π',
|
166 |
+
'difference': 'π’',
|
167 |
+
'penis': 'π±',
|
168 |
+
'paws': 'πΎ',
|
169 |
+
'pawpads': 'πΎ',
|
170 |
+
'hindpaw': 'πΎ',
|
171 |
+
'fur': 'π§₯',
|
172 |
+
'horse': 'π΄',
|
173 |
+
#'pussy': '',
|
174 |
+
'ejaculation': 'π¦',
|
175 |
'cum': 'π¦',
|
176 |
'love': 'β€οΈ',
|
177 |
+
'smaller': 'π½',
|
178 |
+
'bigger': 'πΌ',
|
179 |
+
'larger': 'πΌ',
|
180 |
'cat': 'πΊ',
|
181 |
'cats': 'πΊ',
|
182 |
'dog': 'πΆ',
|
|
|
222 |
|
223 |
# Process each token
|
224 |
for token in tokens:
|
225 |
+
# Skip excluded words and anything containing numbers
|
226 |
+
if token in excluded_words or number_pattern.match(token):
|
|
|
227 |
continue
|
228 |
|
229 |
# First check custom mappings
|
|
|
233 |
found_emojis.append(emoji)
|
234 |
used_emojis.add(emoji)
|
235 |
explanations.append(f"'{token}' β {emoji} (custom mapping)")
|
236 |
+
#else:
|
237 |
+
# explanations.append(f"'{token}' β (skipped - emoji {emoji} already used)")
|
238 |
continue
|
239 |
|
240 |
# Then check emoji mapping
|
|
|
249 |
break
|
250 |
if not found_match:
|
251 |
available_emojis = [e for e in emoji_map[token] if e not in excluded_emojis]
|
252 |
+
#if available_emojis:
|
253 |
+
# explanations.append(f"'{token}' β (skipped - all matching emojis {', '.join(available_emojis)} already used)")
|
254 |
+
#else:
|
255 |
+
# explanations.append(f"'{token}' β (skipped - all matching emojis are excluded)")
|
256 |
else:
|
257 |
explanations.append(f"'{token}' β (no matching emoji found)")
|
258 |
|