File size: 2,668 Bytes
c2cc76d
 
 
ba7d855
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
#!/usr/bin/env python
# -*- coding: utf-8 -*-

"""
This script walks through a directory, identifies image files, and checks for the existence of corresponding
.caption and .tags files. It then concatenates the contents of .caption and .tags files into the .txt files.

Usage:
    - Place the script in the directory containing the image files.
    - Run the script to concatenate .caption and .tags files into .txt files.
    - Use the dry_run flag to preview the changes without writing to the .txt files.

Functions:
    get_files(path): Walks through the directory and yields image files along with their .caption and .tags files.
    concat(caption_path, tags_path, txt_path, dry_run=False): Concatenates the contents of .caption and .tags files into the .txt file.
"""
from pathlib import Path
import os
import re

FILE_EXTS = {".png", ".jpg", ".jpeg", ".tiff", ".bmp", ".gif", ".jxl"}

def get_files(path):
    path = Path(path)
    # Walk the directory, looking for image files
    for root, dirs, files in os.walk(path):
        root = path / root
        for file in files:
            file = root / file
            if file.suffix not in FILE_EXTS:
                continue
            caption = file.with_suffix(".caption")
            tags = file.with_suffix(".tags")
            txt = file.with_suffix(".txt")
            if not caption.exists():
                print(f"{caption} does not exist")
            if not tags.exists():
                print(f"{tags} does not exist")
            yield file, caption, tags, txt

def select_best_caption(caption_path):
    with open(caption_path, "r") as f:
        content = f.read().strip()
    
    captions = re.split(r'----------', content)
    captions = [caption.replace('\n', ' ').strip() for caption in captions if caption.strip()]
    
    best_caption = ""
    for caption in captions:
        if caption and caption[-1] in ".!?":
            if len(caption) > len(best_caption):
                best_caption = caption
    
    return best_caption

def concat(caption_path, tags_path, txt_path, dry_run=False):
    best_caption = select_best_caption(caption_path)
    if not best_caption:
        print(f"No suitable caption found in {caption_path}")
        return
    
    with open(tags_path, "r") as f:
        tags = f.read().strip(", \n")
    
    txt = f"{tags}, {best_caption}"
    
    if dry_run:
        print(f"{txt_path}:")
        print(txt)
        print()
    else:
        with open(txt_path, 'w') as f:
            f.write(txt)
        print(f"wrote {txt_path}")

if __name__ == "__main__":
    dry_run = False
    for f in get_files("."):
        concat(*f[1:], dry_run=dry_run)