yiff_toolkit / scripts /check_captions_for_single_line.py
k4d3's picture
awoo
d2d36fa
raw
history blame
1.78 kB
"""
This script is designed to search for .txt files in a given directory and its subdirectories.
It skips files named 'sample-prompt.txt' and any files ending with '-sample-prompts.txt'.
For each .txt file, it checks if the file contains more than one line.
If a file contains more than one line, it prints the file name.
The script uses the glob and os modules to perform the file search and manipulation.
It starts the search from a specified directory path.
The script is executed by calling the check_files function with the directory path as an argument.
"""
import glob
import os
def check_files(path):
"""
This function recursively searches for .txt files in a given directory and its subdirectories.
It skips files named 'sample-prompts.txt' and any files ending with '-sample-prompts.txt'.
For each .txt file, it checks if the file contains more than one line.
If a file contains more than one line, it prints the file name.
Args:
path (str): The directory path where the search begins.
Returns:
None. The function prints the file names directly.
"""
# Use glob to recursively find all .txt files
for filename in glob.iglob(path + "**/*.txt", recursive=True):
# Skip 'sample-prompts.txt' and '*-sample-prompts.txt' files
if os.path.basename(filename) == "sample-prompts.txt" or os.path.basename(
filename
).endswith("-sample-prompts.txt"):
continue
# Open each text file and check if it contains more than one line
with open(filename, "r", encoding="utf-8") as file:
lines = file.readlines()
if len(lines) > 1:
print(filename)
# Call the function with the directory path
check_files("E:\\training_dir\\")