File size: 31,938 Bytes
00b2f69 0810c86 fc2322a 7ab3132 fc2322a 85d0442 fc2322a 250741a 7c5072b 85d0442 250741a 85d0442 fc2322a 0810c86 00b2f69 0c4f2f7 0810c86 0c4f2f7 0810c86 0c4f2f7 0810c86 0c4f2f7 0810c86 0c4f2f7 0810c86 0c4f2f7 0810c86 0c4f2f7 0810c86 0c4f2f7 0810c86 0c4f2f7 0810c86 0c4f2f7 0810c86 0c4f2f7 0810c86 0c4f2f7 0810c86 a9f2ca0 0810c86 a9f2ca0 0810c86 a9f2ca0 0810c86 a9f2ca0 0810c86 a9f2ca0 0810c86 a9f2ca0 0810c86 a9f2ca0 0810c86 a9f2ca0 0810c86 0c4f2f7 00b2f69 fc2322a 7c5072b 00b2f69 7c5072b 00b2f69 7c5072b 7ab3132 00b2f69 250741a 7ab3132 00b2f69 fc2322a 00b2f69 fc2322a a9f2ca0 fc2322a 00b2f69 fc2322a 00b2f69 7c5072b 00b2f69 7c5072b 00b2f69 fc2322a 00b2f69 7c5072b 00b2f69 63be7a2 a9f2ca0 00b2f69 250741a a9f2ca0 250741a a9f2ca0 250741a bbfc683 250741a bbfc683 250741a 63be7a2 bbfc683 250741a 00b2f69 a9f2ca0 00b2f69 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373 374 375 376 377 378 379 380 381 382 383 384 385 386 387 388 389 390 391 392 393 394 395 396 397 398 399 400 401 402 403 404 405 406 407 408 409 410 411 412 413 414 415 416 417 418 419 420 421 422 423 424 425 426 427 428 429 430 431 432 433 434 435 436 437 438 439 440 |
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## e621.net/e6ai.net JSON to `.txt` tags\n",
"----\n",
"\n",
"This Python script is designed to process JSON files found within a specified directory and its subdirectories. Each JSON file is expected to contain data related to image posts, particularly sourced from online platforms such as image boards. The script parses these JSON files, extracts relevant information such as image URL, ratings, and tags, and generates caption files (`.txt`) based on this data.\n",
"\n",
"Here's a breakdown of what the script does:\n",
"\n",
"1. **Ignoring Tags**: The script defines a list of tags to be ignored during processing, such as \"hi res\", \"shaded\", etc.\n",
"\n",
"2. **Processing Files**: The `process_file` function is responsible for processing each JSON file. It reads the JSON data, extracts the URL of the image file, determines its rating, and extracts tags associated with the image.\n",
"\n",
"3. **Generating Caption File**: For each image, a caption file is generated with the same name as the image file but with a .txt extension. The rating of the image is written first, followed by processed tags.\n",
"\n",
"4. **Processing Tags**: Tags are processed to replace underscores with spaces and to handle special cases such as artist tags. Ignored tags are filtered out."
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [],
"source": [
"\"\"\"\n",
"Define tags to be ignored using regular expressions for exact matching\n",
"\"\"\"\n",
"ignored_tags = [r\"\\bblizzard entertainment\\b\", r\"\\bwarcraft\\b\",\n",
" r\"(?:\\d{4})|(?:\\d+:\\d+)\",\n",
" r\"\\bdetailed\\b\", r\"\\bwidescreen\\b\", r\"\\b4k\\b\",\n",
" r\"\\babsurd res\\b\", r\"\\bhi res\\b\", r\"\\bshaded\\b\", r\"\\bdetailed\\b\",\n",
" r\"\\btagme\\b\",\n",
" r\"\\bdota\\b\",\n",
" r\"\\bcreative commons\\b\", r\"\\bcc-by-nc-nd\\b\",\n",
" r\"\\bsquare enix\\b\", r\"\\bfinal fantasy xiv\\b\", r\"\\bfinal fantasy\\b\",\n",
" r\"\\bmythological canine\\b\", r\"\\basian mythology\\b\", r\"\\bmythological scalie\\b\",\n",
" r\"\\bancient pokemon\\b\", r\"\\bmythological creature\\b\", r\"\\blegendary pokemon\\b\",\n",
" r\"\\bfelis\\b\", r\"\\bfelid\\b\",\n",
" r\"\\bsega\\b\",\n",
" r\"\\bhasbro\\b\",\n",
" r\"\\bzootopia\\b\",\n",
" r\"\\bfive nights at freddy's\\b\",\n",
" r\"\\beeveelution\\b\",\n",
" r\"\\bdisney\\b\",\n",
" r\"\\bmammal\\b\", r\"\\bcanis\\b\", r\"\\bcanine\\b\", r\"\\bcanid\\b\",\n",
" r\"\\bdigimon\\b\", r\"\\bbandai namco\\b\",\n",
" r\"\\bpokemon (species)\\b\",\n",
" r\"\\bpal (species)\\b\",\n",
" r\"\\bpokemon\\b\", r\"\\bnintendo\\b\",\n",
" r\"\\\\bby conditional dnp\\\\b\",\n",
" r\"\\\\bconditional dnp\\\\b\",\n",
" r\"\\\\bconditional_dnp\\\\b\",\n",
" r\"\\\\bby\\\\s+conditional\\\\s+dnp\\\\b\",\n",
" r\"\\bgeneration\\s+\\d+\\s+pokemon\\b\",\n",
"]"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Processing file: <span style=\"font-weight: bold\">E:\\training_dir_staging\\package-lock.json</span>\n",
"</pre>\n"
],
"text/plain": [
"Processing file: \u001b[1mE:\\training_dir_staging\\package-lock.json\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Processing file: <span style=\"font-weight: bold\">E:\\training_dir_staging\\package.json</span>\n",
"</pre>\n"
],
"text/plain": [
"Processing file: \u001b[1mE:\\training_dir_staging\\package.json\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Processing file: <span style=\"font-weight: bold\">E:\\training_dir_staging\\2_anthro_weasel\\</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3051834.j</span><span style=\"font-weight: bold\">son</span>\n",
"</pre>\n"
],
"text/plain": [
"Processing file: \u001b[1mE:\\training_dir_staging\\2_anthro_weasel\\\u001b[0m\u001b[1;36m3051834.j\u001b[0m\u001b[1mson\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
"β<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\"> E:\\training_dir_staging\\2_anthro_weasel\\8a5ed4f684745414e5e57b2e3c9276ff.txt </span>β\n",
"β‘ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ©\n",
"β 4 toes, anthro, balls, bandanna, blush, brown body, brown fur, chastity cage, chastity device, clothing, β\n",
"β dialogue, dildo, exclamation point, feet, fur, genitals, hair, hindpaw, kerchief, legs up, lying, male, male β\n",
"β wearing strapon, masturbation, mostly nude, offscreen character, on back, pawpads, paws, penetrable sex toy, β\n",
"β penis, pink background, pouting, question mark, sex toy, simple background, solo, speech bubble, strapon, β\n",
"β strapon masturbation, strapon over chastity, strapon over penis, tan body, tan fur, teasing, text, toes, β\n",
"β underwear, white hair, by bittenbun, merlin \\(lllmaddy\\), digital media \\(artwork\\), english text β\n",
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
"</pre>\n"
],
"text/plain": [
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
"β\u001b[1;35m \u001b[0m\u001b[1;35m E:\\training_dir_staging\\2_anthro_weasel\\8a5ed4f684745414e5e57b2e3c9276ff.txt \u001b[0m\u001b[1;35m \u001b[0mβ\n",
"β‘ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ©\n",
"β 4 toes, anthro, balls, bandanna, blush, brown body, brown fur, chastity cage, chastity device, clothing, β\n",
"β dialogue, dildo, exclamation point, feet, fur, genitals, hair, hindpaw, kerchief, legs up, lying, male, male β\n",
"β wearing strapon, masturbation, mostly nude, offscreen character, on back, pawpads, paws, penetrable sex toy, β\n",
"β penis, pink background, pouting, question mark, sex toy, simple background, solo, speech bubble, strapon, β\n",
"β strapon masturbation, strapon over chastity, strapon over penis, tan body, tan fur, teasing, text, toes, β\n",
"β underwear, white hair, by bittenbun, merlin \\(lllmaddy\\), digital media \\(artwork\\), english text β\n",
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Processing file: <span style=\"font-weight: bold\">E:\\training_dir_staging\\2_anthro_weasel\\</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3584444.j</span><span style=\"font-weight: bold\">son</span>\n",
"</pre>\n"
],
"text/plain": [
"Processing file: \u001b[1mE:\\training_dir_staging\\2_anthro_weasel\\\u001b[0m\u001b[1;36m3584444.j\u001b[0m\u001b[1mson\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
"β<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\"> E:\\training_dir_staging\\2_anthro_weasel\\ecff451612ee44a571db893d44c910ce.txt </span>β\n",
"β‘ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ©\n",
"β anthro, anus, athletic, athletic anthro, athletic male, backsack, balls, big penis, butt, chandelier, claws, β\n",
"β crouching, erection, eyebrows, fur, genitals, hair, humanoid genitalia, humanoid penis, imminent facesitting, β\n",
"β indoor nudity, inside, licking, licking lips, licking own lips, looking at viewer, looking down, looking down β\n",
"β at viewer, low-angle view, male, nipples, nude, nude anthro, nude male, open mouth, open smile, pawpads, pecs, β\n",
"β penis, perineum, presenting, presenting anus, presenting hindquarters, presenting penis, self lick, smile, β\n",
"β solo, teeth, thick thighs, tongue, tongue out, tuft, worm's-eye view, by kihu, barnaby kane β\n",
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
"</pre>\n"
],
"text/plain": [
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
"β\u001b[1;35m \u001b[0m\u001b[1;35m E:\\training_dir_staging\\2_anthro_weasel\\ecff451612ee44a571db893d44c910ce.txt \u001b[0m\u001b[1;35m \u001b[0mβ\n",
"β‘ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ©\n",
"β anthro, anus, athletic, athletic anthro, athletic male, backsack, balls, big penis, butt, chandelier, claws, β\n",
"β crouching, erection, eyebrows, fur, genitals, hair, humanoid genitalia, humanoid penis, imminent facesitting, β\n",
"β indoor nudity, inside, licking, licking lips, licking own lips, looking at viewer, looking down, looking down β\n",
"β at viewer, low-angle view, male, nipples, nude, nude anthro, nude male, open mouth, open smile, pawpads, pecs, β\n",
"β penis, perineum, presenting, presenting anus, presenting hindquarters, presenting penis, self lick, smile, β\n",
"β solo, teeth, thick thighs, tongue, tongue out, tuft, worm's-eye view, by kihu, barnaby kane β\n",
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Processing file: <span style=\"font-weight: bold\">E:\\training_dir_staging\\2_anthro_weasel\\</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4264190.j</span><span style=\"font-weight: bold\">son</span>\n",
"</pre>\n"
],
"text/plain": [
"Processing file: \u001b[1mE:\\training_dir_staging\\2_anthro_weasel\\\u001b[0m\u001b[1;36m4264190.j\u001b[0m\u001b[1mson\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
"β<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\"> E:\\training_dir_staging\\2_anthro_weasel\\3394ff7098766b5d13ff9728f8178895.txt </span>β\n",
"β‘ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ©\n",
"β 4 toes, after sex, anthro, balls, beach, beach towel, beach umbrella, bedroom eyes, being watched, bodily β\n",
"β fluids, butt, claws, cloud, crowd, cum, cum covered, cum in ass, cum inside, cum on anus, cum on belly, cum on β\n",
"β body, cum on butt, cum on chest, cum on face, cum on feet, cum on head, cum on leg, cum on paw, cum on pawpads, β\n",
"β cum on penis, cum on tail, erection, excessive cum, excessive genital fluids, exhibitionism, feet, fur, genital β\n",
"β fluids, genitals, group, half-closed eyes, humanoid genitalia, humanoid penis, inner ear fluff, legs up, β\n",
"β looking at another, looking at viewer, male, male anthro, messy, narrowed eyes, nude, orange body, orange fur, β\n",
"β outdoor nudity, outside, parasol, pawpads, paws, penis, pinup, pose, presenting, presenting balls, presenting β\n",
"β penis, public, public nudity, realistic penis size, sand, seaside, seductive, sky, smile, smiling at viewer, β\n",
"β solo, spread legs, spreading, tail, toe claws, toes, towel, tuft, whiskers, by oxfort2199 β\n",
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
"</pre>\n"
],
"text/plain": [
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
"β\u001b[1;35m \u001b[0m\u001b[1;35m E:\\training_dir_staging\\2_anthro_weasel\\3394ff7098766b5d13ff9728f8178895.txt \u001b[0m\u001b[1;35m \u001b[0mβ\n",
"β‘ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ©\n",
"β 4 toes, after sex, anthro, balls, beach, beach towel, beach umbrella, bedroom eyes, being watched, bodily β\n",
"β fluids, butt, claws, cloud, crowd, cum, cum covered, cum in ass, cum inside, cum on anus, cum on belly, cum on β\n",
"β body, cum on butt, cum on chest, cum on face, cum on feet, cum on head, cum on leg, cum on paw, cum on pawpads, β\n",
"β cum on penis, cum on tail, erection, excessive cum, excessive genital fluids, exhibitionism, feet, fur, genital β\n",
"β fluids, genitals, group, half-closed eyes, humanoid genitalia, humanoid penis, inner ear fluff, legs up, β\n",
"β looking at another, looking at viewer, male, male anthro, messy, narrowed eyes, nude, orange body, orange fur, β\n",
"β outdoor nudity, outside, parasol, pawpads, paws, penis, pinup, pose, presenting, presenting balls, presenting β\n",
"β penis, public, public nudity, realistic penis size, sand, seaside, seductive, sky, smile, smiling at viewer, β\n",
"β solo, spread legs, spreading, tail, toe claws, toes, towel, tuft, whiskers, by oxfort2199 β\n",
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Processing file: <span style=\"font-weight: bold\">E:\\training_dir_staging\\2_anthro_weasel\\</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">521244.j</span><span style=\"font-weight: bold\">son</span>\n",
"</pre>\n"
],
"text/plain": [
"Processing file: \u001b[1mE:\\training_dir_staging\\2_anthro_weasel\\\u001b[0m\u001b[1;36m521244.j\u001b[0m\u001b[1mson\u001b[0m\n"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
"β<span style=\"color: #800080; text-decoration-color: #800080; font-weight: bold\"> E:\\training_dir_staging\\2_anthro_weasel\\12e58e17f13c0e71cbe40059fb8cc169.txt </span>β\n",
"β‘ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ©\n",
"β 4 toes, anal, anal penetration, animal genitalia, animal penis, anthro, anthro on anthro, balls, bodily fluids, β\n",
"β canine genitalia, canine penis, claws, cum, cum in ass, cum inside, digitigrade, duo, erection, eyes closed, β\n",
"β eyewear, feet, finger fetish, finger in mouth, finger play, fingers, fur, genital fluids, genitals, glasses, β\n",
"β hindpaw, humanoid genitalia, humanoid penis, knot, licking, licking lips, lying, male, male/male, male β\n",
"β penetrated, male penetrating, male penetrating male, nude, on back, pawpads, paws, penetration, penis, precum, β\n",
"β self lick, sex, slightly chubby, soles, spread legs, spreading, toes, tongue, tongue out, white balls, by β\n",
"β conditional dnp, by garnetto, arcshep, tre, tre \\(milligram smile\\) β\n",
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
"</pre>\n"
],
"text/plain": [
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
"β\u001b[1;35m \u001b[0m\u001b[1;35m E:\\training_dir_staging\\2_anthro_weasel\\12e58e17f13c0e71cbe40059fb8cc169.txt \u001b[0m\u001b[1;35m \u001b[0mβ\n",
"β‘ββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ©\n",
"β 4 toes, anal, anal penetration, animal genitalia, animal penis, anthro, anthro on anthro, balls, bodily fluids, β\n",
"β canine genitalia, canine penis, claws, cum, cum in ass, cum inside, digitigrade, duo, erection, eyes closed, β\n",
"β eyewear, feet, finger fetish, finger in mouth, finger play, fingers, fur, genital fluids, genitals, glasses, β\n",
"β hindpaw, humanoid genitalia, humanoid penis, knot, licking, licking lips, lying, male, male/male, male β\n",
"β penetrated, male penetrating, male penetrating male, nude, on back, pawpads, paws, penetration, penis, precum, β\n",
"β self lick, sex, slightly chubby, soles, spread legs, spreading, toes, tongue, tongue out, white balls, by β\n",
"β conditional dnp, by garnetto, arcshep, tre, tre \\(milligram smile\\) β\n",
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n"
]
},
"metadata": {},
"output_type": "display_data"
}
],
"source": [
"import os\n",
"import glob\n",
"import re\n",
"import json\n",
"from rich.console import Console\n",
"from rich.table import Table\n",
"\n",
"console = Console()\n",
"\n",
"\n",
"def should_ignore_tag(tag, all_tags):\n",
" for ignored_tag_pattern in ignored_tags:\n",
" pattern = re.compile(ignored_tag_pattern, re.VERBOSE | re.IGNORECASE)\n",
" if any(re.search(pattern, t) for t in all_tags):\n",
" return True\n",
" return False\n",
"\n",
"\n",
"def process_tags(tags_dict):\n",
" processed_tags = []\n",
" for category, tags_list in tags_dict.items():\n",
" category_tags = []\n",
" if category == \"artist\":\n",
" category_tags = [\n",
" f\"by {tag.replace('_', ' ').replace(' (artist)', '')}\"\n",
" for tag in tags_list\n",
" if tag\n",
" ]\n",
" else:\n",
" for tag in tags_list:\n",
" tag = tag.replace(\"_\", \" \")\n",
" tag = re.sub(r\"(?<!\\\\)\\(\", r\"\\(\", tag)\n",
" tag = re.sub(r\"(?<!\\\\)\\)\", r\"\\)\", tag)\n",
" if tag.lower() == \"artist\":\n",
" continue\n",
" if not should_ignore_tag(tag, tags_list):\n",
" category_tags.append(tag)\n",
" processed_tags.extend(category_tags)\n",
" return processed_tags\n",
"\n",
"def process_file(file_path):\n",
" try:\n",
" console.print(f\"Processing file: [bold]{file_path}[/bold]\")\n",
" with open(file_path, \"r\") as f:\n",
" data = json.load(f)\n",
" # Parse the URL and generate filename\n",
" post_data = data.get(\"post\", {})\n",
" file_data = post_data.get(\"file\", {})\n",
" url = file_data.get(\"url\")\n",
" if url:\n",
" filename, ext = os.path.splitext(os.path.basename(url))\n",
" # Create caption file\n",
" caption_file = f\"{filename}.txt\"\n",
" caption_path = os.path.join(os.path.dirname(file_path), caption_file)\n",
" with open(caption_path, \"w\", encoding=\"utf-8\") as f:\n",
" # Write rating\n",
" rating = post_data.get(\"rating\", \"q\")\n",
" if rating == \"s\":\n",
" f.write(\"rating_safe, \")\n",
" elif rating == \"e\":\n",
" f.write(\"rating_explicit, \")\n",
" else:\n",
" f.write(\"rating_questionable, \")\n",
" # Process tags\n",
" tags_data = post_data.get(\"tags\", {})\n",
" processed_tags = process_tags(tags_data)\n",
" # Check if there are any valid tags before writing\n",
" if processed_tags:\n",
" # Join tags with commas and write to file\n",
" tags_line = \", \".join(processed_tags)\n",
" f.write(tags_line.strip())\n",
"\n",
" # Create a table\n",
" table = Table(show_header=True, header_style=\"bold magenta\")\n",
" table.add_column(caption_path, justify=\"center\")\n",
"\n",
" table.add_row(tags_line.strip())\n",
"\n",
" # Print the table\n",
" console.print(table)\n",
" except Exception as e:\n",
" console.print(f\"Error processing file: [bold]{file_path}[/bold]\")\n",
" console.print(e)\n",
"\n",
"\n",
"def recursive_process(directory):\n",
" for file_path in glob.glob(directory + \"/**/*.json\", recursive=True):\n",
" process_file(file_path)\n",
"\n",
"\n",
"if __name__ == \"__main__\":\n",
" #root_directory = r\"E:\\training_dir\"\n",
" root_directory = r\"E:\\training_dir_staging\"\n",
" recursive_process(root_directory)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Delete every JSON file β οΈ\n",
"\n",
"---"
]
},
{
"cell_type": "code",
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
"import glob\n",
"import os\n",
"\n",
"def delete_json_files(directory):\n",
" # Create the path pattern to match JSON files\n",
" pattern = os.path.join(directory, '**', '*.json')\n",
" \n",
" # Find all JSON files using glob\n",
" json_files = glob.glob(pattern, recursive=True)\n",
" \n",
" # Delete each JSON file\n",
" for file_path in json_files:\n",
" try:\n",
" os.remove(file_path)\n",
" print(f\"Deleted: {file_path}\")\n",
" except OSError as e:\n",
" print(f\"Error deleting {file_path}: {e}\")\n",
"\n",
"if __name__ == \"__main__\":\n",
" # Specify the directory path\n",
" #directory_path = r'E:\\training_dir'\n",
" directory_path = r'C:\\Users\\kade\\Desktop\\training_dir_staging'\n",
"\n",
" # Call the function to delete JSON files recursively\n",
" delete_json_files(directory_path)"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.12.3"
}
},
"nbformat": 4,
"nbformat_minor": 2
}
|