k4d3 commited on
Commit
b05b0bc
1 Parent(s): 050d08f

Signed-off-by: Balazs Horvath <[email protected]>

Files changed (2) hide show
  1. README.md +60 -0
  2. dataset_tools/e621 JSON to txt.ipynb +0 -535
README.md CHANGED
@@ -185,6 +185,66 @@ Now you are ready to right-click on each group and download the images.
185
 
186
  ### Manual Method
187
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
188
  ---
189
 
190
  ## LoRA Training Guide
 
185
 
186
  ### Manual Method
187
 
188
+ This method requires a browser extension like [ViolentMonkey](https://violentmonkey.github.io/) and the following UserScript:
189
+
190
+ <div style="background-color: lightyellow; padding: 10px;">
191
+ <details>
192
+ <summary>Click to reveal userscript.</summary>
193
+
194
+ ```js
195
+ // ==UserScript==
196
+ // @name e621 JSON Button
197
+ // @namespace https://cringe.live
198
+ // @version 1.0
199
+ // @description Adds a JSON button next to the download button on e621.net
200
+ // @author _ka_de
201
+ // @match https://e621.net/*
202
+ // @match https://e6ai.net/*
203
+ // @grant none
204
+ // ==/UserScript==
205
+
206
+ (function() {
207
+ 'use strict';
208
+
209
+ function constructJSONUrl() {
210
+ // Get the current URL
211
+ var currentUrl = window.location.href;
212
+ // Extract the post ID from the URL
213
+ var postId = currentUrl.match(/^https?:\/\/(?:e621\.net|e6ai\.net)\/posts\/(\d+)/)[1];
214
+ // Check the hostname
215
+ var hostname = window.location.hostname;
216
+ // Construct the JSON URL based on the hostname
217
+ var jsonUrl = 'https://' + hostname + '/posts/' + postId + '.json';
218
+ return jsonUrl;
219
+ }
220
+
221
+ function createJSONButton() {
222
+ // Create a new button element
223
+ var jsonButton = document.createElement('a');
224
+ // Set the attributes for the button
225
+ jsonButton.setAttribute('class', 'button btn-info');
226
+ var jsonUrl = constructJSONUrl();
227
+ // Set the JSON URL as the button's href attribute
228
+ jsonButton.setAttribute('href', jsonUrl);
229
+ // Set the inner HTML for the button
230
+ jsonButton.innerHTML = '<i class="fa-solid fa-angle-double-right"></i><span>JSON</span>';
231
+
232
+ // Find the container where we want to insert the button
233
+ var container = document.getElementById('image-extra-controls');
234
+ // Insert the button after the download button
235
+ container.insertBefore(jsonButton, container.children[0].nextSibling);
236
+ }
237
+
238
+ // Run the function to create the JSON button
239
+ createJSONButton();
240
+ })();
241
+ ```
242
+
243
+ </details>
244
+ </div>
245
+
246
+ This will put a link to the JSON next to the download button on e621.net and e6ai.net and you can use [this](https://huggingface.co/k4d3/yiff_toolkit/blob/main/dataset_tools/e621%20JSON%20to%20txt.ipynb) Python script to convert them to caption files, it uses the `rating_` prefix before `safe/questionable/explicit` because.. you've guessed it, Pony! It also lets you ignore the tags you add into `ignored_tags` using the `r"\btag\b",` syntax, just replace `tag` with the tag you want it to skip.
247
+
248
  ---
249
 
250
  ## LoRA Training Guide
dataset_tools/e621 JSON to txt.ipynb CHANGED
@@ -3193,541 +3193,6 @@
3193
  " root_directory = r\"E:\\training_dir\\wickerbeast\"\n",
3194
  " recursive_process(root_directory)"
3195
  ]
3196
- },
3197
- {
3198
- "cell_type": "code",
3199
- "execution_count": 1,
3200
- "metadata": {},
3201
- "outputs": [
3202
- {
3203
- "name": "stderr",
3204
- "output_type": "stream",
3205
- "text": [
3206
- "<>:42: SyntaxWarning: invalid escape sequence '\\('\n",
3207
- "<>:42: SyntaxWarning: invalid escape sequence '\\('\n",
3208
- "C:\\Users\\kade\\AppData\\Local\\Temp\\ipykernel_6940\\2358354964.py:42: SyntaxWarning: invalid escape sequence '\\('\n",
3209
- " \"pokemon \\(species\\)\",\n"
3210
- ]
3211
- },
3212
- {
3213
- "data": {
3214
- "text/html": [
3215
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Processing file: <span style=\"font-weight: bold\">E:\\training_dir\\voicemod_protogen\\</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3911050.j</span><span style=\"font-weight: bold\">son</span>\n",
3216
- "</pre>\n"
3217
- ],
3218
- "text/plain": [
3219
- "Processing file: \u001b[1mE:\\training_dir\\voicemod_protogen\\\u001b[0m\u001b[1;36m3911050.j\u001b[0m\u001b[1mson\u001b[0m\n"
3220
- ]
3221
- },
3222
- "metadata": {},
3223
- "output_type": "display_data"
3224
- },
3225
- {
3226
- "data": {
3227
- "text/html": [
3228
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Creating caption file: <span style=\"font-weight: bold\">E:\\training_dir\\voicemod_protogen\\74f0deb5d828b5f4d4c55b3a2bb13a8b.txt</span>\n",
3229
- "</pre>\n"
3230
- ],
3231
- "text/plain": [
3232
- "Creating caption file: \u001b[1mE:\\training_dir\\voicemod_protogen\\74f0deb5d828b5f4d4c55b3a2bb13a8b.txt\u001b[0m\n"
3233
- ]
3234
- },
3235
- "metadata": {},
3236
- "output_type": "display_data"
3237
- },
3238
- {
3239
- "data": {
3240
- "text/html": [
3241
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Writing tags: <span style=\"font-style: italic\">anthro, biped, blue clothing, blue footwear, blue legwear, blue socks, blue thigh highs, blue thigh </span>\n",
3242
- "<span style=\"font-style: italic\">socks, clothing, electronics, footwear, girly, headphones, legwear, long socks, machine, male, male anthro, </span>\n",
3243
- "<span style=\"font-style: italic\">multicolored clothing, multicolored footwear, multicolored legwear, multicolored socks, multicolored thigh highs, </span>\n",
3244
- "<span style=\"font-style: italic\">multicolored thigh socks, neck tuft, pattern clothing, pattern footwear, pattern legwear, pattern socks, pattern </span>\n",
3245
- "<span style=\"font-style: italic\">thigh highs, pattern thigh socks, screen, simple background, socks, solo, striped clothing, striped footwear, </span>\n",
3246
- "<span style=\"font-style: italic\">striped legwear, striped socks, striped thigh highs, striped thigh socks, stripes, thigh highs, thigh socks, tuft, </span>\n",
3247
- "<span style=\"font-style: italic\">two tone clothing, two tone footwear, two tone legwear, two tone socks, two tone thigh highs, two tone thigh socks,</span>\n",
3248
- "<span style=\"font-style: italic\">white clothing, white footwear, white legwear, white socks, white thigh highs, white thigh socks, by pinkpoffinz, </span>\n",
3249
- "<span style=\"font-style: italic\">programming socks, voicemod, voicemod protogen, protogen, robot</span>\n",
3250
- "</pre>\n"
3251
- ],
3252
- "text/plain": [
3253
- "Writing tags: \u001b[3manthro, biped, blue clothing, blue footwear, blue legwear, blue socks, blue thigh highs, blue thigh \u001b[0m\n",
3254
- "\u001b[3msocks, clothing, electronics, footwear, girly, headphones, legwear, long socks, machine, male, male anthro, \u001b[0m\n",
3255
- "\u001b[3mmulticolored clothing, multicolored footwear, multicolored legwear, multicolored socks, multicolored thigh highs, \u001b[0m\n",
3256
- "\u001b[3mmulticolored thigh socks, neck tuft, pattern clothing, pattern footwear, pattern legwear, pattern socks, pattern \u001b[0m\n",
3257
- "\u001b[3mthigh highs, pattern thigh socks, screen, simple background, socks, solo, striped clothing, striped footwear, \u001b[0m\n",
3258
- "\u001b[3mstriped legwear, striped socks, striped thigh highs, striped thigh socks, stripes, thigh highs, thigh socks, tuft, \u001b[0m\n",
3259
- "\u001b[3mtwo tone clothing, two tone footwear, two tone legwear, two tone socks, two tone thigh highs, two tone thigh socks,\u001b[0m\n",
3260
- "\u001b[3mwhite clothing, white footwear, white legwear, white socks, white thigh highs, white thigh socks, by pinkpoffinz, \u001b[0m\n",
3261
- "\u001b[3mprogramming socks, voicemod, voicemod protogen, protogen, robot\u001b[0m\n"
3262
- ]
3263
- },
3264
- "metadata": {},
3265
- "output_type": "display_data"
3266
- },
3267
- {
3268
- "data": {
3269
- "text/html": [
3270
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Processing file: <span style=\"font-weight: bold\">E:\\training_dir\\voicemod_protogen\\</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3921738.j</span><span style=\"font-weight: bold\">son</span>\n",
3271
- "</pre>\n"
3272
- ],
3273
- "text/plain": [
3274
- "Processing file: \u001b[1mE:\\training_dir\\voicemod_protogen\\\u001b[0m\u001b[1;36m3921738.j\u001b[0m\u001b[1mson\u001b[0m\n"
3275
- ]
3276
- },
3277
- "metadata": {},
3278
- "output_type": "display_data"
3279
- },
3280
- {
3281
- "data": {
3282
- "text/html": [
3283
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Creating caption file: <span style=\"font-weight: bold\">E:\\training_dir\\voicemod_protogen\\51ff893c703fc54cadff97b02935f7ab.txt</span>\n",
3284
- "</pre>\n"
3285
- ],
3286
- "text/plain": [
3287
- "Creating caption file: \u001b[1mE:\\training_dir\\voicemod_protogen\\51ff893c703fc54cadff97b02935f7ab.txt\u001b[0m\n"
3288
- ]
3289
- },
3290
- "metadata": {},
3291
- "output_type": "display_data"
3292
- },
3293
- {
3294
- "data": {
3295
- "text/html": [
3296
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Writing tags: <span style=\"font-style: italic\">anthro, balls, blue clothing, blue footwear, blue legwear, blue socks, blue thigh highs, blue thigh </span>\n",
3297
- "<span style=\"font-style: italic\">socks, chastity cage, chastity device, clothing, footwear, fur, genitals, glowing, glowing eyes, glowing genitalia,</span>\n",
3298
- "<span style=\"font-style: italic\">glowing penis, legwear, long socks, machine, male, multicolored clothing, multicolored footwear, multicolored </span>\n",
3299
- "<span style=\"font-style: italic\">legwear, multicolored socks, multicolored thigh highs, multicolored thigh socks, pattern clothing, pattern </span>\n",
3300
- "<span style=\"font-style: italic\">footwear, pattern legwear, pattern socks, pattern thigh highs, pattern thigh socks, penis, socks, solo, striped </span>\n",
3301
- "<span style=\"font-style: italic\">clothing, striped footwear, striped legwear, striped socks, striped thigh highs, striped thigh socks, stripes, </span>\n",
3302
- "<span style=\"font-style: italic\">thigh highs, thigh socks, two tone clothing, two tone footwear, two tone legwear, two tone socks, two tone thigh </span>\n",
3303
- "<span style=\"font-style: italic\">highs, two tone thigh socks, white clothing, white footwear, white legwear, white socks, white thigh highs, white </span>\n",
3304
- "<span style=\"font-style: italic\">thigh socks, by paul 0w0, programming socks, voicemod, voicemod protogen, protogen</span>\n",
3305
- "</pre>\n"
3306
- ],
3307
- "text/plain": [
3308
- "Writing tags: \u001b[3manthro, balls, blue clothing, blue footwear, blue legwear, blue socks, blue thigh highs, blue thigh \u001b[0m\n",
3309
- "\u001b[3msocks, chastity cage, chastity device, clothing, footwear, fur, genitals, glowing, glowing eyes, glowing genitalia,\u001b[0m\n",
3310
- "\u001b[3mglowing penis, legwear, long socks, machine, male, multicolored clothing, multicolored footwear, multicolored \u001b[0m\n",
3311
- "\u001b[3mlegwear, multicolored socks, multicolored thigh highs, multicolored thigh socks, pattern clothing, pattern \u001b[0m\n",
3312
- "\u001b[3mfootwear, pattern legwear, pattern socks, pattern thigh highs, pattern thigh socks, penis, socks, solo, striped \u001b[0m\n",
3313
- "\u001b[3mclothing, striped footwear, striped legwear, striped socks, striped thigh highs, striped thigh socks, stripes, \u001b[0m\n",
3314
- "\u001b[3mthigh highs, thigh socks, two tone clothing, two tone footwear, two tone legwear, two tone socks, two tone thigh \u001b[0m\n",
3315
- "\u001b[3mhighs, two tone thigh socks, white clothing, white footwear, white legwear, white socks, white thigh highs, white \u001b[0m\n",
3316
- "\u001b[3mthigh socks, by paul 0w0, programming socks, voicemod, voicemod protogen, protogen\u001b[0m\n"
3317
- ]
3318
- },
3319
- "metadata": {},
3320
- "output_type": "display_data"
3321
- },
3322
- {
3323
- "data": {
3324
- "text/html": [
3325
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Processing file: <span style=\"font-weight: bold\">E:\\training_dir\\voicemod_protogen\\</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3922605.j</span><span style=\"font-weight: bold\">son</span>\n",
3326
- "</pre>\n"
3327
- ],
3328
- "text/plain": [
3329
- "Processing file: \u001b[1mE:\\training_dir\\voicemod_protogen\\\u001b[0m\u001b[1;36m3922605.j\u001b[0m\u001b[1mson\u001b[0m\n"
3330
- ]
3331
- },
3332
- "metadata": {},
3333
- "output_type": "display_data"
3334
- },
3335
- {
3336
- "data": {
3337
- "text/html": [
3338
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Creating caption file: <span style=\"font-weight: bold\">E:\\training_dir\\voicemod_protogen\\045c3a1219e5783df66217ca825884ca.txt</span>\n",
3339
- "</pre>\n"
3340
- ],
3341
- "text/plain": [
3342
- "Creating caption file: \u001b[1mE:\\training_dir\\voicemod_protogen\\045c3a1219e5783df66217ca825884ca.txt\u001b[0m\n"
3343
- ]
3344
- },
3345
- "metadata": {},
3346
- "output_type": "display_data"
3347
- },
3348
- {
3349
- "data": {
3350
- "text/html": [
3351
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Writing tags: <span style=\"font-style: italic\">anthro, anthrofied, blue clothing, blue footwear, blue legwear, blue socks, blue thigh highs, blue </span>\n",
3352
- "<span style=\"font-style: italic\">thigh socks, blush, clothing, electronics, fluffy, fluffy tail, footwear, fur, girly, group, headphones, legwear, </span>\n",
3353
- "<span style=\"font-style: italic\">long socks, machine, male, male/male, multicolored clothing, multicolored footwear, multicolored legwear, </span>\n",
3354
- "<span style=\"font-style: italic\">multicolored socks, multicolored thigh highs, multicolored thigh socks, one eye closed, pattern clothing, pattern </span>\n",
3355
- "<span style=\"font-style: italic\">footwear, pattern legwear, pattern socks, pattern thigh highs, pattern thigh socks, red clothing, red footwear, red</span>\n",
3356
- "<span style=\"font-style: italic\">legwear, red socks, red thigh highs, red thigh socks, simple background, socks, stockings, striped clothing, </span>\n",
3357
- "<span style=\"font-style: italic\">striped footwear, striped legwear, striped socks, striped thigh highs, striped thigh socks, stripes, tail, thigh </span>\n",
3358
- "<span style=\"font-style: italic\">highs, thigh socks, trio, two tone clothing, two tone footwear, two tone legwear, two tone socks, two tone thigh </span>\n",
3359
- "<span style=\"font-style: italic\">highs, two tone thigh socks, white clothing, white footwear, white legwear, white socks, white thigh highs, white </span>\n",
3360
- "<span style=\"font-style: italic\">thigh socks, wink, msi, mythology, opera \\</span><span style=\"font-weight: bold; font-style: italic\">(</span><span style=\"font-style: italic\">browser\\</span><span style=\"font-weight: bold; font-style: italic\">)</span><span style=\"font-style: italic\">, programming socks, voicemod, aura \\</span><span style=\"font-weight: bold; font-style: italic\">(</span><span style=\"font-style: italic\">operagx\\</span><span style=\"font-weight: bold; font-style: italic\">)</span><span style=\"font-style: italic\">, msi dragon </span>\n",
3361
- "<span style=\"font-style: italic\">lucky, voicemod protogen, domestic cat, dragon, feline, furred dragon, protogen, robot, scalie</span>\n",
3362
- "</pre>\n"
3363
- ],
3364
- "text/plain": [
3365
- "Writing tags: \u001b[3manthro, anthrofied, blue clothing, blue footwear, blue legwear, blue socks, blue thigh highs, blue \u001b[0m\n",
3366
- "\u001b[3mthigh socks, blush, clothing, electronics, fluffy, fluffy tail, footwear, fur, girly, group, headphones, legwear, \u001b[0m\n",
3367
- "\u001b[3mlong socks, machine, male, male/male, multicolored clothing, multicolored footwear, multicolored legwear, \u001b[0m\n",
3368
- "\u001b[3mmulticolored socks, multicolored thigh highs, multicolored thigh socks, one eye closed, pattern clothing, pattern \u001b[0m\n",
3369
- "\u001b[3mfootwear, pattern legwear, pattern socks, pattern thigh highs, pattern thigh socks, red clothing, red footwear, red\u001b[0m\n",
3370
- "\u001b[3mlegwear, red socks, red thigh highs, red thigh socks, simple background, socks, stockings, striped clothing, \u001b[0m\n",
3371
- "\u001b[3mstriped footwear, striped legwear, striped socks, striped thigh highs, striped thigh socks, stripes, tail, thigh \u001b[0m\n",
3372
- "\u001b[3mhighs, thigh socks, trio, two tone clothing, two tone footwear, two tone legwear, two tone socks, two tone thigh \u001b[0m\n",
3373
- "\u001b[3mhighs, two tone thigh socks, white clothing, white footwear, white legwear, white socks, white thigh highs, white \u001b[0m\n",
3374
- "\u001b[3mthigh socks, wink, msi, mythology, opera \\\u001b[0m\u001b[1;3m(\u001b[0m\u001b[3mbrowser\\\u001b[0m\u001b[1;3m)\u001b[0m\u001b[3m, programming socks, voicemod, aura \\\u001b[0m\u001b[1;3m(\u001b[0m\u001b[3moperagx\\\u001b[0m\u001b[1;3m)\u001b[0m\u001b[3m, msi dragon \u001b[0m\n",
3375
- "\u001b[3mlucky, voicemod protogen, domestic cat, dragon, feline, furred dragon, protogen, robot, scalie\u001b[0m\n"
3376
- ]
3377
- },
3378
- "metadata": {},
3379
- "output_type": "display_data"
3380
- },
3381
- {
3382
- "data": {
3383
- "text/html": [
3384
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Processing file: <span style=\"font-weight: bold\">E:\\training_dir\\voicemod_protogen\\</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3926617.j</span><span style=\"font-weight: bold\">son</span>\n",
3385
- "</pre>\n"
3386
- ],
3387
- "text/plain": [
3388
- "Processing file: \u001b[1mE:\\training_dir\\voicemod_protogen\\\u001b[0m\u001b[1;36m3926617.j\u001b[0m\u001b[1mson\u001b[0m\n"
3389
- ]
3390
- },
3391
- "metadata": {},
3392
- "output_type": "display_data"
3393
- },
3394
- {
3395
- "data": {
3396
- "text/html": [
3397
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Creating caption file: <span style=\"font-weight: bold\">E:\\training_dir\\voicemod_protogen\\4ecac26b3956e2f4f2cda478cc49009d.txt</span>\n",
3398
- "</pre>\n"
3399
- ],
3400
- "text/plain": [
3401
- "Creating caption file: \u001b[1mE:\\training_dir\\voicemod_protogen\\4ecac26b3956e2f4f2cda478cc49009d.txt\u001b[0m\n"
3402
- ]
3403
- },
3404
- "metadata": {},
3405
- "output_type": "display_data"
3406
- },
3407
- {
3408
- "data": {
3409
- "text/html": [
3410
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Writing tags: <span style=\"font-style: italic\">:</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold; font-style: italic\">3</span><span style=\"font-style: italic\">, ambiguous gender, black body, black fur, clothing, dialogue, footwear, fur, gesture, girly, hand </span>\n",
3411
- "<span style=\"font-style: italic\">gesture, humor, joke, kneeling, light, machine, male, pointing, simple background, socks, sunlight, tail, text, by </span>\n",
3412
- "<span style=\"font-style: italic\">lyanmyan, voicemod, voicemod protogen, protogen</span>\n",
3413
- "</pre>\n"
3414
- ],
3415
- "text/plain": [
3416
- "Writing tags: \u001b[3m:\u001b[0m\u001b[1;3;36m3\u001b[0m\u001b[3m, ambiguous gender, black body, black fur, clothing, dialogue, footwear, fur, gesture, girly, hand \u001b[0m\n",
3417
- "\u001b[3mgesture, humor, joke, kneeling, light, machine, male, pointing, simple background, socks, sunlight, tail, text, by \u001b[0m\n",
3418
- "\u001b[3mlyanmyan, voicemod, voicemod protogen, protogen\u001b[0m\n"
3419
- ]
3420
- },
3421
- "metadata": {},
3422
- "output_type": "display_data"
3423
- },
3424
- {
3425
- "data": {
3426
- "text/html": [
3427
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Processing file: <span style=\"font-weight: bold\">E:\\training_dir\\voicemod_protogen\\</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3936137.j</span><span style=\"font-weight: bold\">son</span>\n",
3428
- "</pre>\n"
3429
- ],
3430
- "text/plain": [
3431
- "Processing file: \u001b[1mE:\\training_dir\\voicemod_protogen\\\u001b[0m\u001b[1;36m3936137.j\u001b[0m\u001b[1mson\u001b[0m\n"
3432
- ]
3433
- },
3434
- "metadata": {},
3435
- "output_type": "display_data"
3436
- },
3437
- {
3438
- "data": {
3439
- "text/html": [
3440
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Creating caption file: <span style=\"font-weight: bold\">E:\\training_dir\\voicemod_protogen\\f9b83994b104a0a5338279e503680429.txt</span>\n",
3441
- "</pre>\n"
3442
- ],
3443
- "text/plain": [
3444
- "Creating caption file: \u001b[1mE:\\training_dir\\voicemod_protogen\\f9b83994b104a0a5338279e503680429.txt\u001b[0m\n"
3445
- ]
3446
- },
3447
- "metadata": {},
3448
- "output_type": "display_data"
3449
- },
3450
- {
3451
- "data": {
3452
- "text/html": [
3453
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Writing tags: <span style=\"font-style: italic\">ambiguous gender, anthro, bedroom eyes, big tail, black body, black fur, blue eyes, blue inner ear, </span>\n",
3454
- "<span style=\"font-style: italic\">duo, fluffy, fluffy tail, fur, girly, hair, jack-o' pose, machine, macro, male, markings, mascot, micro, </span>\n",
3455
- "<span style=\"font-style: italic\">multicolored hair, musical note, narrowed eyes, pose, seductive, striped markings, striped tail, stripes, tail, </span>\n",
3456
- "<span style=\"font-style: italic\">tail markings, by drunkarcher, voicemod, voicemod protogen, humanoid, protogen, digital media \\</span><span style=\"font-weight: bold; font-style: italic\">(</span><span style=\"font-style: italic\">artwork\\</span><span style=\"font-weight: bold; font-style: italic\">)</span>\n",
3457
- "</pre>\n"
3458
- ],
3459
- "text/plain": [
3460
- "Writing tags: \u001b[3mambiguous gender, anthro, bedroom eyes, big tail, black body, black fur, blue eyes, blue inner ear, \u001b[0m\n",
3461
- "\u001b[3mduo, fluffy, fluffy tail, fur, girly, hair, jack-o' pose, machine, macro, male, markings, mascot, micro, \u001b[0m\n",
3462
- "\u001b[3mmulticolored hair, musical note, narrowed eyes, pose, seductive, striped markings, striped tail, stripes, tail, \u001b[0m\n",
3463
- "\u001b[3mtail markings, by drunkarcher, voicemod, voicemod protogen, humanoid, protogen, digital media \\\u001b[0m\u001b[1;3m(\u001b[0m\u001b[3martwork\\\u001b[0m\u001b[1;3m)\u001b[0m\n"
3464
- ]
3465
- },
3466
- "metadata": {},
3467
- "output_type": "display_data"
3468
- },
3469
- {
3470
- "data": {
3471
- "text/html": [
3472
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Processing file: <span style=\"font-weight: bold\">E:\\training_dir\\voicemod_protogen\\</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">3936805.j</span><span style=\"font-weight: bold\">son</span>\n",
3473
- "</pre>\n"
3474
- ],
3475
- "text/plain": [
3476
- "Processing file: \u001b[1mE:\\training_dir\\voicemod_protogen\\\u001b[0m\u001b[1;36m3936805.j\u001b[0m\u001b[1mson\u001b[0m\n"
3477
- ]
3478
- },
3479
- "metadata": {},
3480
- "output_type": "display_data"
3481
- },
3482
- {
3483
- "data": {
3484
- "text/html": [
3485
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Creating caption file: <span style=\"font-weight: bold\">E:\\training_dir\\voicemod_protogen\\47d29a54b0831ef54f41f6f2afe2cd04.txt</span>\n",
3486
- "</pre>\n"
3487
- ],
3488
- "text/plain": [
3489
- "Creating caption file: \u001b[1mE:\\training_dir\\voicemod_protogen\\47d29a54b0831ef54f41f6f2afe2cd04.txt\u001b[0m\n"
3490
- ]
3491
- },
3492
- "metadata": {},
3493
- "output_type": "display_data"
3494
- },
3495
- {
3496
- "data": {
3497
- "text/html": [
3498
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Writing tags: <span style=\"font-style: italic\">abstract background, ambiguous gender, anthro, baby bottle, blue eyes, blue mouth, blue nose, blue </span>\n",
3499
- "<span style=\"font-style: italic\">stripes, bottle, bright colors, chastity cage, chastity device, clean diaper, clothing, container, diaper, diaper </span>\n",
3500
- "<span style=\"font-style: italic\">fetish, flipping viewer off, for a head, gesture, hand gesture, hypnosis, long socks, machine, middle finger, mind </span>\n",
3501
- "<span style=\"font-style: italic\">control, musical note, paws, profanity, screen, screen face, screencap, screencap background, screencap edit, sex </span>\n",
3502
- "<span style=\"font-style: italic\">toy, solo, spiral, spiral background, stripes, text, underwear, vibrator, wand vibrator, white fingers, by </span>\n",
3503
- "<span style=\"font-style: italic\">harleyfunnycat, programming socks, twitter, voicemod, voicemod protogen, object head, protogen, robot, screen head,</span>\n",
3504
- "<span style=\"font-style: italic\">english text, meme</span>\n",
3505
- "</pre>\n"
3506
- ],
3507
- "text/plain": [
3508
- "Writing tags: \u001b[3mabstract background, ambiguous gender, anthro, baby bottle, blue eyes, blue mouth, blue nose, blue \u001b[0m\n",
3509
- "\u001b[3mstripes, bottle, bright colors, chastity cage, chastity device, clean diaper, clothing, container, diaper, diaper \u001b[0m\n",
3510
- "\u001b[3mfetish, flipping viewer off, for a head, gesture, hand gesture, hypnosis, long socks, machine, middle finger, mind \u001b[0m\n",
3511
- "\u001b[3mcontrol, musical note, paws, profanity, screen, screen face, screencap, screencap background, screencap edit, sex \u001b[0m\n",
3512
- "\u001b[3mtoy, solo, spiral, spiral background, stripes, text, underwear, vibrator, wand vibrator, white fingers, by \u001b[0m\n",
3513
- "\u001b[3mharleyfunnycat, programming socks, twitter, voicemod, voicemod protogen, object head, protogen, robot, screen head,\u001b[0m\n",
3514
- "\u001b[3menglish text, meme\u001b[0m\n"
3515
- ]
3516
- },
3517
- "metadata": {},
3518
- "output_type": "display_data"
3519
- },
3520
- {
3521
- "data": {
3522
- "text/html": [
3523
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Processing file: <span style=\"font-weight: bold\">E:\\training_dir\\voicemod_protogen\\</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4270812.j</span><span style=\"font-weight: bold\">son</span>\n",
3524
- "</pre>\n"
3525
- ],
3526
- "text/plain": [
3527
- "Processing file: \u001b[1mE:\\training_dir\\voicemod_protogen\\\u001b[0m\u001b[1;36m4270812.j\u001b[0m\u001b[1mson\u001b[0m\n"
3528
- ]
3529
- },
3530
- "metadata": {},
3531
- "output_type": "display_data"
3532
- },
3533
- {
3534
- "data": {
3535
- "text/html": [
3536
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Creating caption file: <span style=\"font-weight: bold\">E:\\training_dir\\voicemod_protogen\\159dbfe085eea74ed6740b7df2d264e6.txt</span>\n",
3537
- "</pre>\n"
3538
- ],
3539
- "text/plain": [
3540
- "Creating caption file: \u001b[1mE:\\training_dir\\voicemod_protogen\\159dbfe085eea74ed6740b7df2d264e6.txt\u001b[0m\n"
3541
- ]
3542
- },
3543
- "metadata": {},
3544
- "output_type": "display_data"
3545
- },
3546
- {
3547
- "data": {
3548
- "text/html": [
3549
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Writing tags: <span style=\"font-style: italic\">anthro, clothing, diaper, electronics, footwear, girly, headphones, legwear, machine, male, sitting, </span>\n",
3550
- "<span style=\"font-style: italic\">socks, solo, stockings, tail, underwear, by lonegreenorcacalf, voicemod, voicemod protogen, protogen</span>\n",
3551
- "</pre>\n"
3552
- ],
3553
- "text/plain": [
3554
- "Writing tags: \u001b[3manthro, clothing, diaper, electronics, footwear, girly, headphones, legwear, machine, male, sitting, \u001b[0m\n",
3555
- "\u001b[3msocks, solo, stockings, tail, underwear, by lonegreenorcacalf, voicemod, voicemod protogen, protogen\u001b[0m\n"
3556
- ]
3557
- },
3558
- "metadata": {},
3559
- "output_type": "display_data"
3560
- },
3561
- {
3562
- "data": {
3563
- "text/html": [
3564
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Processing file: <span style=\"font-weight: bold\">E:\\training_dir\\voicemod_protogen\\</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold\">4429041.j</span><span style=\"font-weight: bold\">son</span>\n",
3565
- "</pre>\n"
3566
- ],
3567
- "text/plain": [
3568
- "Processing file: \u001b[1mE:\\training_dir\\voicemod_protogen\\\u001b[0m\u001b[1;36m4429041.j\u001b[0m\u001b[1mson\u001b[0m\n"
3569
- ]
3570
- },
3571
- "metadata": {},
3572
- "output_type": "display_data"
3573
- },
3574
- {
3575
- "data": {
3576
- "text/html": [
3577
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Creating caption file: <span style=\"font-weight: bold\">E:\\training_dir\\voicemod_protogen\\39d7abf951fc2a45c545ab458a422c45.txt</span>\n",
3578
- "</pre>\n"
3579
- ],
3580
- "text/plain": [
3581
- "Creating caption file: \u001b[1mE:\\training_dir\\voicemod_protogen\\39d7abf951fc2a45c545ab458a422c45.txt\u001b[0m\n"
3582
- ]
3583
- },
3584
- "metadata": {},
3585
- "output_type": "display_data"
3586
- },
3587
- {
3588
- "data": {
3589
- "text/html": [
3590
- "<pre style=\"white-space:pre;overflow-x:auto;line-height:normal;font-family:Menlo,'DejaVu Sans Mono',consolas,'Courier New',monospace\">Writing tags: <span style=\"font-style: italic\">:</span><span style=\"color: #008080; text-decoration-color: #008080; font-weight: bold; font-style: italic\">3</span><span style=\"font-style: italic\">, anthro, armwear, claws, clothing, cybernetics, dialogue, electronics, featureless crotch, finger </span>\n",
3591
- "<span style=\"font-style: italic\">claws, fingers, fluffy, fur, girly, hair, hand on hip, headgear, headphones, headset, legwear, looking at viewer, </span>\n",
3592
- "<span style=\"font-style: italic\">low-angle view, machine, male, male anthro, multicolored body, multicolored fur, multicolored hair, multicolored </span>\n",
3593
- "<span style=\"font-style: italic\">tail, pattern armwear, pattern clothing, pattern legwear, solo, speech bubble, striped armwear, striped clothing, </span>\n",
3594
- "<span style=\"font-style: italic\">striped legwear, stripes, tail, talking to viewer, thigh highs, visor, by pinkpoffinz, voicemod, voicemod protogen,</span>\n",
3595
- "<span style=\"font-style: italic\">cyborg, protogen, digital media \\</span><span style=\"font-weight: bold; font-style: italic\">(</span><span style=\"font-style: italic\">artwork\\</span><span style=\"font-weight: bold; font-style: italic\">)</span>\n",
3596
- "</pre>\n"
3597
- ],
3598
- "text/plain": [
3599
- "Writing tags: \u001b[3m:\u001b[0m\u001b[1;3;36m3\u001b[0m\u001b[3m, anthro, armwear, claws, clothing, cybernetics, dialogue, electronics, featureless crotch, finger \u001b[0m\n",
3600
- "\u001b[3mclaws, fingers, fluffy, fur, girly, hair, hand on hip, headgear, headphones, headset, legwear, looking at viewer, \u001b[0m\n",
3601
- "\u001b[3mlow-angle view, machine, male, male anthro, multicolored body, multicolored fur, multicolored hair, multicolored \u001b[0m\n",
3602
- "\u001b[3mtail, pattern armwear, pattern clothing, pattern legwear, solo, speech bubble, striped armwear, striped clothing, \u001b[0m\n",
3603
- "\u001b[3mstriped legwear, stripes, tail, talking to viewer, thigh highs, visor, by pinkpoffinz, voicemod, voicemod protogen,\u001b[0m\n",
3604
- "\u001b[3mcyborg, protogen, digital media \\\u001b[0m\u001b[1;3m(\u001b[0m\u001b[3martwork\\\u001b[0m\u001b[1;3m)\u001b[0m\n"
3605
- ]
3606
- },
3607
- "metadata": {},
3608
- "output_type": "display_data"
3609
- }
3610
- ],
3611
- "source": [
3612
- "import os\n",
3613
- "import json\n",
3614
- "from rich.console import Console\n",
3615
- "\n",
3616
- "console = Console()\n",
3617
- "\n",
3618
- "# Define tags to be ignored\n",
3619
- "ignored_tags = [\n",
3620
- " \"hi res\",\n",
3621
- " \"shaded\",\n",
3622
- " \"tagme\",\n",
3623
- " \"absurd res\",\n",
3624
- " \"detailed\",\n",
3625
- " \"dota\",\n",
3626
- " \"creative commons\",\n",
3627
- " \"cc-by-nc-nd\",\n",
3628
- " \"square enix\",\n",
3629
- " \"by conditional dnp\",\n",
3630
- " \"final fantasy xiv\",\n",
3631
- " \"final fantasy\",\n",
3632
- " \"mythological canine\",\n",
3633
- " \"ancient pokemon\",\n",
3634
- " \"felis\",\n",
3635
- " \"asian mythology\",\n",
3636
- " \"mythological scalie\",\n",
3637
- " \"widescreen\",\n",
3638
- " \"mythological creature\",\n",
3639
- " \"4k\",\n",
3640
- " \"felid\",\n",
3641
- " \"sega\",\n",
3642
- " \"hasbro\",\n",
3643
- " \"legendary pokemon\",\n",
3644
- " \"zootopia\",\n",
3645
- " \"five nights at freddy's\"\n",
3646
- " \"eeveelution\",\n",
3647
- " \"disney\",\n",
3648
- " \"canis\",\n",
3649
- " \"canine\",\n",
3650
- " \"digimon\",\n",
3651
- " \"canid\",\n",
3652
- " \"bandai namco\",\n",
3653
- " \"pokemon \\(species\\)\",\n",
3654
- " \"mammal\",\n",
3655
- " \"pokemon\", # Make sure this doesnt kill generation 1 pokemon, etc, strict tag matching!\n",
3656
- " \"nintendo\"\n",
3657
- "]\n",
3658
- "\n",
3659
- "def process_file(file_path):\n",
3660
- " try:\n",
3661
- " console.print(f\"Processing file: [bold]{file_path}[/bold]\")\n",
3662
- " with open(file_path, \"r\") as f:\n",
3663
- " data = json.load(f)\n",
3664
- "\n",
3665
- " # Parse the URL and generate filename\n",
3666
- " post_data = data.get(\"post\", {})\n",
3667
- " file_data = post_data.get(\"file\", {})\n",
3668
- " url = file_data.get(\"url\")\n",
3669
- " if url:\n",
3670
- " filename, ext = os.path.splitext(os.path.basename(url))\n",
3671
- "\n",
3672
- " # Create caption file\n",
3673
- " caption_file = f\"{filename}.txt\"\n",
3674
- " caption_path = os.path.join(os.path.dirname(file_path), caption_file)\n",
3675
- "\n",
3676
- " with open(caption_path, \"w\", encoding=\"utf-8\") as f:\n",
3677
- " console.print(f\"Creating caption file: [bold]{caption_path}[/bold]\")\n",
3678
- " # Write rating\n",
3679
- " rating = post_data.get(\"rating\", \"q\")\n",
3680
- " if rating == \"s\":\n",
3681
- " f.write(\"rating_safe, \")\n",
3682
- " elif rating == \"e\":\n",
3683
- " f.write(\"rating_explicit, \")\n",
3684
- " else:\n",
3685
- " f.write(\"rating_questionable, \")\n",
3686
- "\n",
3687
- " # Process tags\n",
3688
- " tags = []\n",
3689
- " tags_data = post_data.get(\"tags\", {})\n",
3690
- " for category, tags_list in tags_data.items():\n",
3691
- " for tag in tags_list:\n",
3692
- " # Replace underscores with spaces\n",
3693
- " tag = tag.replace(\"_\", \" \")\n",
3694
- " if tag.lower() not in ignored_tags:\n",
3695
- " processed_tag = process_tag(tag, category)\n",
3696
- " if processed_tag:\n",
3697
- " tags.append(processed_tag)\n",
3698
- "\n",
3699
- " # Check if there are any valid tags before writing\n",
3700
- " if tags:\n",
3701
- " # Join tags with commas and write to file\n",
3702
- " tags_line = \", \".join(tags)\n",
3703
- " f.write(tags_line.strip())\n",
3704
- " console.print(f\"Writing tags: [italic]{tags_line.strip()}[/italic]\")\n",
3705
- "\n",
3706
- " except Exception as e:\n",
3707
- " console.print(f\"Error processing file: [bold]{file_path}[/bold]\")\n",
3708
- " console.print(e)\n",
3709
- "\n",
3710
- "def process_tag(tag, category=None):\n",
3711
- " if tag.isdigit() or all(part.isdigit() for part in tag.split(':')):\n",
3712
- " return \"\"\n",
3713
- " tag = tag.replace(\"_\", \" \")\n",
3714
- " if category == \"artist\":\n",
3715
- " return f\"by {tag}\"\n",
3716
- " else:\n",
3717
- " tag = tag.replace(\"(\", \"\\\\(\").replace(\")\", \"\\\\)\")\n",
3718
- " return tag\n",
3719
- "\n",
3720
- "def recursive_process(directory):\n",
3721
- " for root, dirs, files in os.walk(directory):\n",
3722
- " for file in files:\n",
3723
- " if file.endswith(\".json\"):\n",
3724
- " file_path = os.path.join(root, file)\n",
3725
- " process_file(file_path)\n",
3726
- "\n",
3727
- "if __name__ == \"__main__\":\n",
3728
- " root_directory = r\"E:\\training_dir\\voicemod_protogen\"\n",
3729
- " recursive_process(root_directory)"
3730
- ]
3731
  }
3732
  ],
3733
  "metadata": {
 
3193
  " root_directory = r\"E:\\training_dir\\wickerbeast\"\n",
3194
  " recursive_process(root_directory)"
3195
  ]
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3196
  }
3197
  ],
3198
  "metadata": {