File size: 2,668 Bytes
c2cc76d ba7d855 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 |
#!/usr/bin/env python
# -*- coding: utf-8 -*-
"""
This script walks through a directory, identifies image files, and checks for the existence of corresponding
.caption and .tags files. It then concatenates the contents of .caption and .tags files into the .txt files.
Usage:
- Place the script in the directory containing the image files.
- Run the script to concatenate .caption and .tags files into .txt files.
- Use the dry_run flag to preview the changes without writing to the .txt files.
Functions:
get_files(path): Walks through the directory and yields image files along with their .caption and .tags files.
concat(caption_path, tags_path, txt_path, dry_run=False): Concatenates the contents of .caption and .tags files into the .txt file.
"""
from pathlib import Path
import os
import re
FILE_EXTS = {".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".jxl"}
def get_files(path):
path = Path(path)
# Walk the directory, looking for image files
for root, dirs, files in os.walk(path):
root = path / root
for file in files:
file = root / file
if file.suffix not in FILE_EXTS:
continue
caption = file.with_suffix(".caption")
tags = file.with_suffix(".tags")
txt = file.with_suffix(".txt")
if not caption.exists():
print(f"{caption} does not exist")
if not tags.exists():
print(f"{tags} does not exist")
yield file, caption, tags, txt
def select_best_caption(caption_path):
with open(caption_path, "r") as f:
content = f.read().strip()
captions = re.split(r'----------', content)
captions = [caption.replace('\n', ' ').strip() for caption in captions if caption.strip()]
best_caption = ""
for caption in captions:
if caption and caption[-1] in ".!?":
if len(caption) > len(best_caption):
best_caption = caption
return best_caption
def concat(caption_path, tags_path, txt_path, dry_run=False):
best_caption = select_best_caption(caption_path)
if not best_caption:
print(f"No suitable caption found in {caption_path}")
return
with open(tags_path, "r") as f:
tags = f.read().strip(", \n")
txt = f"{tags}, {best_caption}"
if dry_run:
print(f"{txt_path}:")
print(txt)
print()
else:
with open(txt_path, 'w') as f:
f.write(txt)
print(f"wrote {txt_path}")
if __name__ == "__main__":
dry_run = False
for f in get_files("."):
concat(*f[1:], dry_run=dry_run)
|