{ "cells": [ { "cell_type": "markdown", "metadata": {}, "source": [ "# Pony2Seaart\n", "\n", "---\n", "\n", "This script converts Pony captions to Seaart." ] }, { "cell_type": "code", "execution_count": 8, "metadata": {}, "outputs": [], "source": [ "import os\n", "import glob\n", "\n", "def pony2seaart(file_path):\n", " if os.path.basename(file_path).endswith(\"sample-prompts.txt\"):\n", " return\n", " with open(file_path, \"r\") as file:\n", " lines = file.readlines()\n", " new_lines = []\n", " for line in lines:\n", " parts = line.strip().split(\", \")\n", " tags = []\n", " caption = \"\"\n", " for part in parts:\n", " if \".\" in part:\n", " if not caption:\n", " caption = part\n", " else:\n", " caption += \", \" + part\n", " else:\n", " tag = part.replace(\" \", \"_\")\n", " if tag == \"rating_explicit\":\n", " tag = \"nsfw\"\n", " elif tag == \"rating_safe\":\n", " tag = \"safe\"\n", " elif tag == \"rating_questionable\":\n", " tag = \"questionable\"\n", " tags.append(tag)\n", " new_line = \", \".join(tags) + (\", \" + caption if caption else \"\")\n", " new_lines.append(new_line + \"\\n\")\n", "\n", " new_file_name = os.path.splitext(os.path.basename(file_path))[0] + \".seaart\"\n", " new_file_path = os.path.join(os.path.dirname(file_path), new_file_name)\n", " with open(new_file_path, \"w\") as new_file:\n", " new_file.writelines(new_lines)\n", "\n", "def process_directory(directory):\n", " txt_files = glob.glob(os.path.join(directory, \"**\", \"*.txt\"), recursive=True)\n", " for file_path in txt_files:\n", " pony2seaart(file_path)\n", "\n", "if __name__ == \"__main__\":\n", " input_dir = \"C:\\\\Users\\\\kade\\\\Desktop\\\\training_dir_staging\"\n", " process_directory(input_dir)" ] } ], "metadata": { "kernelspec": { "display_name": "Python 3", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.3" } }, "nbformat": 4, "nbformat_minor": 2 }