awoo
Browse filesSigned-off-by: Balazs Horvath <[email protected]>
dataset_tools/e621 JSON to txt.ipynb
CHANGED
@@ -22,16 +22,13 @@
|
|
22 |
},
|
23 |
{
|
24 |
"cell_type": "code",
|
25 |
-
"execution_count":
|
26 |
"metadata": {},
|
27 |
"outputs": [],
|
28 |
"source": [
|
29 |
"# Define tags to be ignored using regular expressions for exact matching\n",
|
30 |
"ignored_tags = [r\"\\bblizzard entertainment\\b\", r\"\\bwarcraft\\b\",\n",
|
31 |
-
" r\"
|
32 |
-
" r\"\\b2005\\b\", r\"\\b2006\\b\", r\"\\b2007\\b\", r\"\\b2008\\b\", r\"\\b2009\\b\", r\"\\b2010\\b\", r\"\\b2011\\b\",\n",
|
33 |
-
" r\"\\b2012\\b\", r\"\\b2013\\b\", r\"\\b2014\\b\", r\"\\b2015\\b\", r\"\\b2016\\b\", r\"\\b2017\\b\", r\"\\b2018\\b\",\n",
|
34 |
-
" r\"\\b2019\\b\", r\"\\b2020\\b\", r\"\\b2021\\b\", r\"\\b2022\\b\", r\"\\b2023\\b\", r\"\\b2024\\b\",\n",
|
35 |
" r\"\\bdetailed\\b\", r\"\\bwidescreen\\b\", r\"\\b4k\\b\",\n",
|
36 |
" r\"\\babsurd res\\b\", r\"\\bhi res\\b\", r\"\\bshaded\\b\", r\"\\bdetailed\\b\",\n",
|
37 |
" r\"\\btagme\\b\",\n",
|
@@ -50,16 +47,17 @@
|
|
50 |
" r\"\\bmammal\\b\", r\"\\bcanis\\b\", r\"\\bcanine\\b\", r\"\\bcanid\\b\",\n",
|
51 |
" r\"\\bdigimon\\b\", r\"\\bbandai namco\\b\",\n",
|
52 |
" r\"\\bpokemon \\(species\\)\\b\", r\"\\bpokemon\\b\", r\"\\bnintendo\\b\",\n",
|
53 |
-
" r\"\\b5:4\\b\", r\"\\b5:3\\b\", r\"\\b4:5\\b\", r\"\\b1:1\\b\", r\"\\b5:6\\b\", r\"\\b6:5\\b\", r\"\\b16:9\\b\", r\"\\b3:4\\b\",\n",
|
54 |
" r\"\\\\bby conditional dnp\\\\b\",\n",
|
55 |
" r\"\\\\bconditional dnp\\\\b\",\n",
|
56 |
" r\"\\\\bconditional_dnp\\\\b\",\n",
|
57 |
-
" r\"\\\\bby\\\\s+conditional\\\\s+dnp\\\\b\"
|
|
|
|
|
58 |
]
|
59 |
},
|
60 |
{
|
61 |
"cell_type": "code",
|
62 |
-
"execution_count":
|
63 |
"metadata": {},
|
64 |
"outputs": [
|
65 |
{
|
@@ -1458,8 +1456,7 @@
|
|
1458 |
"β fur, cervine genitalia, cervine penis, duo, erection, eyes closed, fur, genitals, grey body, grey fur, β\n",
|
1459 |
"β interspecies, knotting, lying, male, male/male, male penetrated, male penetrating, male penetrating male, β\n",
|
1460 |
"β nipples, nude, on back, penetration, penile, penile penetration, penis, penis in ass, sex, table lotus β\n",
|
1461 |
-
"β position, by peregrine pegs, beastars, legoshi \\(beastars\\), louis \\(beastars\\), cervine, deer, red deer, wolf
|
1462 |
-
"β 3:2 β\n",
|
1463 |
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
1464 |
"</pre>\n"
|
1465 |
],
|
@@ -1472,8 +1469,7 @@
|
|
1472 |
"β fur, cervine genitalia, cervine penis, duo, erection, eyes closed, fur, genitals, grey body, grey fur, β\n",
|
1473 |
"β interspecies, knotting, lying, male, male/male, male penetrated, male penetrating, male penetrating male, β\n",
|
1474 |
"β nipples, nude, on back, penetration, penile, penile penetration, penis, penis in ass, sex, table lotus β\n",
|
1475 |
-
"β position, by peregrine pegs, beastars, legoshi \\(beastars\\), louis \\(beastars\\), cervine, deer, red deer, wolf
|
1476 |
-
"β 3:2 β\n",
|
1477 |
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n"
|
1478 |
]
|
1479 |
},
|
@@ -3130,7 +3126,7 @@
|
|
3130 |
"β penetration, penile, penile penetration, penis, penis in ass, plant, quadruped, ranged weapon, rifle, saliva, β\n",
|
3131 |
"β scut tail, sex, sheath, short tail, standing, standing sex, tail, teeth, thrusting, tongue, tongue out, tree, β\n",
|
3132 |
"β weapon, by backlash91, by conditional dnp, bambi \\(film\\), the great prince of the forest, cervine, deer, β\n",
|
3133 |
-
"β
|
3134 |
"βββββββββββββοΏ½οΏ½βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
3135 |
"</pre>\n"
|
3136 |
],
|
@@ -3149,7 +3145,7 @@
|
|
3149 |
"β penetration, penile, penile penetration, penis, penis in ass, plant, quadruped, ranged weapon, rifle, saliva, β\n",
|
3150 |
"β scut tail, sex, sheath, short tail, standing, standing sex, tail, teeth, thrusting, tongue, tongue out, tree, β\n",
|
3151 |
"β weapon, by backlash91, by conditional dnp, bambi \\(film\\), the great prince of the forest, cervine, deer, β\n",
|
3152 |
-
"β
|
3153 |
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n"
|
3154 |
]
|
3155 |
},
|
@@ -3371,7 +3367,7 @@
|
|
3371 |
"β piercing, plant, predator/prey, restraints, rope, rope bondage, sex, sheath, side view, size difference, β\n",
|
3372 |
"β smaller male, smile, spread legs, spreading, standing, submissive, submissive male, tree, tree bondage, wrists β\n",
|
3373 |
"β together, by conditional dnp, by xenoforge, ruska, tanner james, cervine, cross fox, deer, elk, fox, red fox, β\n",
|
3374 |
-
"β
|
3375 |
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
3376 |
"</pre>\n"
|
3377 |
],
|
@@ -3387,7 +3383,7 @@
|
|
3387 |
"β piercing, plant, predator/prey, restraints, rope, rope bondage, sex, sheath, side view, size difference, β\n",
|
3388 |
"β smaller male, smile, spread legs, spreading, standing, submissive, submissive male, tree, tree bondage, wrists β\n",
|
3389 |
"β together, by conditional dnp, by xenoforge, ruska, tanner james, cervine, cross fox, deer, elk, fox, red fox, β\n",
|
3390 |
-
"β
|
3391 |
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n"
|
3392 |
]
|
3393 |
},
|
|
|
22 |
},
|
23 |
{
|
24 |
"cell_type": "code",
|
25 |
+
"execution_count": 57,
|
26 |
"metadata": {},
|
27 |
"outputs": [],
|
28 |
"source": [
|
29 |
"# Define tags to be ignored using regular expressions for exact matching\n",
|
30 |
"ignored_tags = [r\"\\bblizzard entertainment\\b\", r\"\\bwarcraft\\b\",\n",
|
31 |
+
" r\"(?:\\d{4})|(?:\\d+:\\d+)\",\n",
|
|
|
|
|
|
|
32 |
" r\"\\bdetailed\\b\", r\"\\bwidescreen\\b\", r\"\\b4k\\b\",\n",
|
33 |
" r\"\\babsurd res\\b\", r\"\\bhi res\\b\", r\"\\bshaded\\b\", r\"\\bdetailed\\b\",\n",
|
34 |
" r\"\\btagme\\b\",\n",
|
|
|
47 |
" r\"\\bmammal\\b\", r\"\\bcanis\\b\", r\"\\bcanine\\b\", r\"\\bcanid\\b\",\n",
|
48 |
" r\"\\bdigimon\\b\", r\"\\bbandai namco\\b\",\n",
|
49 |
" r\"\\bpokemon \\(species\\)\\b\", r\"\\bpokemon\\b\", r\"\\bnintendo\\b\",\n",
|
|
|
50 |
" r\"\\\\bby conditional dnp\\\\b\",\n",
|
51 |
" r\"\\\\bconditional dnp\\\\b\",\n",
|
52 |
" r\"\\\\bconditional_dnp\\\\b\",\n",
|
53 |
+
" r\"\\\\bby\\\\s+conditional\\\\s+dnp\\\\b\",\n",
|
54 |
+
" r\"\\\\bgeneration\\s+\\d+\\s+pokemon\\\\b\",\n",
|
55 |
+
"]"
|
56 |
]
|
57 |
},
|
58 |
{
|
59 |
"cell_type": "code",
|
60 |
+
"execution_count": 58,
|
61 |
"metadata": {},
|
62 |
"outputs": [
|
63 |
{
|
|
|
1456 |
"β fur, cervine genitalia, cervine penis, duo, erection, eyes closed, fur, genitals, grey body, grey fur, β\n",
|
1457 |
"β interspecies, knotting, lying, male, male/male, male penetrated, male penetrating, male penetrating male, β\n",
|
1458 |
"β nipples, nude, on back, penetration, penile, penile penetration, penis, penis in ass, sex, table lotus β\n",
|
1459 |
+
"β position, by peregrine pegs, beastars, legoshi \\(beastars\\), louis \\(beastars\\), cervine, deer, red deer, wolf β\n",
|
|
|
1460 |
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
1461 |
"</pre>\n"
|
1462 |
],
|
|
|
1469 |
"β fur, cervine genitalia, cervine penis, duo, erection, eyes closed, fur, genitals, grey body, grey fur, β\n",
|
1470 |
"β interspecies, knotting, lying, male, male/male, male penetrated, male penetrating, male penetrating male, β\n",
|
1471 |
"β nipples, nude, on back, penetration, penile, penile penetration, penis, penis in ass, sex, table lotus β\n",
|
1472 |
+
"β position, by peregrine pegs, beastars, legoshi \\(beastars\\), louis \\(beastars\\), cervine, deer, red deer, wolf β\n",
|
|
|
1473 |
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n"
|
1474 |
]
|
1475 |
},
|
|
|
3126 |
"β penetration, penile, penile penetration, penis, penis in ass, plant, quadruped, ranged weapon, rifle, saliva, β\n",
|
3127 |
"β scut tail, sex, sheath, short tail, standing, standing sex, tail, teeth, thrusting, tongue, tongue out, tree, β\n",
|
3128 |
"β weapon, by backlash91, by conditional dnp, bambi \\(film\\), the great prince of the forest, cervine, deer, β\n",
|
3129 |
+
"β human, artist name β\n",
|
3130 |
"βββββββββββββοΏ½οΏ½βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
3131 |
"</pre>\n"
|
3132 |
],
|
|
|
3145 |
"β penetration, penile, penile penetration, penis, penis in ass, plant, quadruped, ranged weapon, rifle, saliva, β\n",
|
3146 |
"β scut tail, sex, sheath, short tail, standing, standing sex, tail, teeth, thrusting, tongue, tongue out, tree, β\n",
|
3147 |
"β weapon, by backlash91, by conditional dnp, bambi \\(film\\), the great prince of the forest, cervine, deer, β\n",
|
3148 |
+
"β human, artist name β\n",
|
3149 |
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n"
|
3150 |
]
|
3151 |
},
|
|
|
3367 |
"β piercing, plant, predator/prey, restraints, rope, rope bondage, sex, sheath, side view, size difference, β\n",
|
3368 |
"β smaller male, smile, spread legs, spreading, standing, submissive, submissive male, tree, tree bondage, wrists β\n",
|
3369 |
"β together, by conditional dnp, by xenoforge, ruska, tanner james, cervine, cross fox, deer, elk, fox, red fox, β\n",
|
3370 |
+
"β true fox β\n",
|
3371 |
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n",
|
3372 |
"</pre>\n"
|
3373 |
],
|
|
|
3383 |
"β piercing, plant, predator/prey, restraints, rope, rope bondage, sex, sheath, side view, size difference, β\n",
|
3384 |
"β smaller male, smile, spread legs, spreading, standing, submissive, submissive male, tree, tree bondage, wrists β\n",
|
3385 |
"β together, by conditional dnp, by xenoforge, ruska, tanner james, cervine, cross fox, deer, elk, fox, red fox, β\n",
|
3386 |
+
"β true fox β\n",
|
3387 |
"βββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββββ\n"
|
3388 |
]
|
3389 |
},
|