|
|
|
""" |
|
This script walks through a directory, identifies image files, and checks for the existence of corresponding |
|
.caption and .tags files. It then concatenates the contents of .caption and .tags files into the .txt files. |
|
|
|
Usage: |
|
- Place the script in the directory containing the image files. |
|
- Run the script to concatenate .caption and .tags files into .txt files. |
|
- Use the dry_run flag to preview the changes without writing to the .txt files. |
|
|
|
Functions: |
|
get_files(path): Walks through the directory and yields image files along with their .caption and .tags files. |
|
concat(caption_path, tags_path, txt_path, dry_run=False): Concatenates the contents of .caption and .tags files into the .txt file. |
|
""" |
|
from pathlib import Path |
|
import os |
|
import re |
|
|
|
FILE_EXTS = {".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".jxl"} |
|
|
|
def get_files(path): |
|
path = Path(path) |
|
|
|
for root, dirs, files in os.walk(path): |
|
root = path / root |
|
for file in files: |
|
file = root / file |
|
if file.suffix not in FILE_EXTS: |
|
continue |
|
caption = file.with_suffix(".caption") |
|
tags = file.with_suffix(".tags") |
|
txt = file.with_suffix(".txt") |
|
if not caption.exists(): |
|
print(f"{caption} does not exist") |
|
if not tags.exists(): |
|
print(f"{tags} does not exist") |
|
yield file, caption, tags, txt |
|
|
|
def select_best_caption(caption_path): |
|
with open(caption_path, "r") as f: |
|
content = f.read().strip() |
|
|
|
captions = re.split(r'----------', content) |
|
captions = [caption.replace('\n', ' ').strip() for caption in captions if caption.strip()] |
|
|
|
best_caption = "" |
|
for caption in captions: |
|
if caption and caption[-1] in ".!?": |
|
if len(caption) > len(best_caption): |
|
best_caption = caption |
|
|
|
return best_caption |
|
|
|
def concat(caption_path, tags_path, txt_path, dry_run=False): |
|
best_caption = select_best_caption(caption_path) |
|
if not best_caption: |
|
print(f"No suitable caption found in {caption_path}") |
|
return |
|
|
|
with open(tags_path, "r") as f: |
|
tags = f.read().strip(", \n") |
|
|
|
txt = f"{tags}, {best_caption}" |
|
|
|
if dry_run: |
|
print(f"{txt_path}:") |
|
print(txt) |
|
print() |
|
else: |
|
with open(txt_path, 'w') as f: |
|
f.write(txt) |
|
print(f"wrote {txt_path}") |
|
|
|
if __name__ == "__main__": |
|
dry_run = False |
|
for f in get_files("."): |
|
concat(*f[1:], dry_run=dry_run) |
|
|
|
|