k4d3 commited on
Commit
3f04019
β€’
1 Parent(s): c057738

way too much effort for emojis

Browse files
Files changed (1) hide show
  1. txt2emoji +127 -11
txt2emoji CHANGED
@@ -18,6 +18,7 @@ from nltk.tokenize import word_tokenize
18
  from emoji import EMOJI_DATA
19
  import argparse
20
  from pathlib import Path
 
21
 
22
  # Download required NLTK data (only needed once)
23
  nltk.download('punkt', quiet=True)
@@ -47,19 +48,135 @@ def text_to_emojis(text):
47
  # Create emoji mapping and variations
48
  emoji_map, emoji_variations = get_emoji_mapping()
49
 
 
 
 
50
  # Emojis to exclude
51
- excluded_emojis = {'πŸ”Ά', 'β­•', 'πŸ”·', 'πŸ”Ή', 'πŸ”Έ', 'πŸ”Ί', 'πŸ”»', 'πŸ”΄', 'πŸ”΅', 'πŸ”Ό', 'πŸ”½', 'πŸ”Ύ', 'πŸ‡΅πŸ‡¬', 'πŸ€„', 'πŸ”²', 'βœ…'}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
52
 
53
  # Words to exclude from emoji conversion
54
- excluded_words = {'big', 'small', 'the', 'a', 'an', 'and', 'or', 'but', 'if', 'then', 'because', 'as', 'until', 'while', ','}
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
55
 
56
  # Track used emojis and their variations
57
  used_emojis = set()
58
 
59
  # Additional manual mappings for common words
60
  custom_mappings = {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
61
  'cum': 'πŸ’¦',
62
  'love': '❀️',
 
 
 
63
  'cat': '😺',
64
  'cats': '😺',
65
  'dog': '🐢',
@@ -105,9 +222,8 @@ def text_to_emojis(text):
105
 
106
  # Process each token
107
  for token in tokens:
108
- # Skip excluded words
109
- if token in excluded_words:
110
- explanations.append(f"'{token}' β†’ (skipped - excluded word)")
111
  continue
112
 
113
  # First check custom mappings
@@ -117,8 +233,8 @@ def text_to_emojis(text):
117
  found_emojis.append(emoji)
118
  used_emojis.add(emoji)
119
  explanations.append(f"'{token}' β†’ {emoji} (custom mapping)")
120
- else:
121
- explanations.append(f"'{token}' β†’ (skipped - emoji {emoji} already used)")
122
  continue
123
 
124
  # Then check emoji mapping
@@ -133,10 +249,10 @@ def text_to_emojis(text):
133
  break
134
  if not found_match:
135
  available_emojis = [e for e in emoji_map[token] if e not in excluded_emojis]
136
- if available_emojis:
137
- explanations.append(f"'{token}' β†’ (skipped - all matching emojis {', '.join(available_emojis)} already used)")
138
- else:
139
- explanations.append(f"'{token}' β†’ (skipped - all matching emojis are excluded)")
140
  else:
141
  explanations.append(f"'{token}' β†’ (no matching emoji found)")
142
 
 
18
  from emoji import EMOJI_DATA
19
  import argparse
20
  from pathlib import Path
21
+ import re
22
 
23
  # Download required NLTK data (only needed once)
24
  nltk.download('punkt', quiet=True)
 
48
  # Create emoji mapping and variations
49
  emoji_map, emoji_variations = get_emoji_mapping()
50
 
51
+ # Regex pattern to match any token containing numbers
52
+ number_pattern = re.compile(r'.*\d+.*')
53
+
54
  # Emojis to exclude
55
+ excluded_emojis = {
56
+ 'πŸ”Ά',
57
+ 'β­•',
58
+ 'πŸ”·',
59
+ 'πŸ”Ή',
60
+ 'πŸ”Έ',
61
+ 'πŸ”Ί',
62
+ 'πŸ”»',
63
+ 'πŸ”΄',
64
+ 'πŸ”΅',
65
+ 'πŸ”Ό',
66
+ 'πŸ”Ύ',
67
+ 'πŸ‡΅πŸ‡¬',
68
+ 'πŸ€„',
69
+ 'πŸ”²',
70
+ 'βœ…'
71
+ }
72
 
73
  # Words to exclude from emoji conversion
74
+ excluded_words = {
75
+ '(',
76
+ ')',
77
+ 'purple',
78
+ 'abdominal',
79
+ 'penetration',
80
+ 'feral',
81
+ 'body',
82
+ 'nude',
83
+ 'anthro',
84
+ 'big',
85
+ 'small',
86
+ 'the',
87
+ 'a',
88
+ 'an',
89
+ 'and',
90
+ 'or',
91
+ 'but',
92
+ 'if',
93
+ 'then',
94
+ 'because',
95
+ 'as',
96
+ 'until',
97
+ 'while',
98
+ ',',
99
+ 'hi',
100
+ 'res',
101
+ 'pussy'
102
+ 'penetrated',
103
+ 'equine',
104
+ 'felid',
105
+ 'feline',
106
+ 'equid',
107
+ 'genital',
108
+ 'genitals',
109
+ 'penetrating',
110
+ 'medial',
111
+ 'ring',
112
+ 'inside',
113
+ 'duo',
114
+ 'solo',
115
+ 'in',
116
+ 'hair',
117
+ 'andromorph',
118
+ 'from',
119
+ 'behind',
120
+ 'position',
121
+ 'pantherine',
122
+ 'animal',
123
+ 'brown',
124
+ 'sub',
125
+ 'dom',
126
+ 'explicit',
127
+ 'black',
128
+ 'bulge',
129
+ 'dominant',
130
+ 'kousen',
131
+ 'rendan',
132
+ 'genitalia',
133
+ 'tan',
134
+ 'simple',
135
+ 'media',
136
+ 'vaginal',
137
+ 'red',
138
+ 'pecs',
139
+ 'navel',
140
+ 'background',
141
+ 'pubes',
142
+ }
143
 
144
  # Track used emojis and their variations
145
  used_emojis = set()
146
 
147
  # Additional manual mappings for common words
148
  custom_mappings = {
149
+ 'markings': '🏷️',
150
+ 'sweat': 'πŸ’§',
151
+ 'toes': 'πŸ‘£',
152
+ 'teeth': '🦷',
153
+ 'fingering': 'πŸ‘‰',
154
+ 'blush': '😊',
155
+ 'male': '♂️',
156
+ 'tiger': '🐯',
157
+ 'fluids': 'πŸ’§',
158
+ 'wolf': '🐺',
159
+ 'dog': '🐢',
160
+ 'female': '♀️',
161
+ 'intersex': '⚧️',
162
+ 'muscular': 'πŸ’ͺ',
163
+ 'wheelbarrow': '🚜',
164
+ 'sex': 'πŸ’‘',
165
+ 'size': 'πŸ“',
166
+ 'difference': 'πŸ”’',
167
+ 'penis': 'πŸ”±',
168
+ 'paws': '🐾',
169
+ 'pawpads': '🐾',
170
+ 'hindpaw': '🐾',
171
+ 'fur': 'πŸ§₯',
172
+ 'horse': '🐴',
173
+ #'pussy': '',
174
+ 'ejaculation': 'πŸ’¦',
175
  'cum': 'πŸ’¦',
176
  'love': '❀️',
177
+ 'smaller': 'πŸ”½',
178
+ 'bigger': 'πŸ”Ό',
179
+ 'larger': 'πŸ”Ό',
180
  'cat': '😺',
181
  'cats': '😺',
182
  'dog': '🐢',
 
222
 
223
  # Process each token
224
  for token in tokens:
225
+ # Skip excluded words and anything containing numbers
226
+ if token in excluded_words or number_pattern.match(token):
 
227
  continue
228
 
229
  # First check custom mappings
 
233
  found_emojis.append(emoji)
234
  used_emojis.add(emoji)
235
  explanations.append(f"'{token}' β†’ {emoji} (custom mapping)")
236
+ #else:
237
+ # explanations.append(f"'{token}' β†’ (skipped - emoji {emoji} already used)")
238
  continue
239
 
240
  # Then check emoji mapping
 
249
  break
250
  if not found_match:
251
  available_emojis = [e for e in emoji_map[token] if e not in excluded_emojis]
252
+ #if available_emojis:
253
+ # explanations.append(f"'{token}' β†’ (skipped - all matching emojis {', '.join(available_emojis)} already used)")
254
+ #else:
255
+ # explanations.append(f"'{token}' β†’ (skipped - all matching emojis are excluded)")
256
  else:
257
  explanations.append(f"'{token}' β†’ (no matching emoji found)")
258