k4d3 commited on
Commit
d2d36fa
1 Parent(s): 3e238a0

Signed-off-by: Balazs Horvath <[email protected]>

scripts/check_captions_for_single_line.py ADDED
@@ -0,0 +1,45 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This script is designed to search for .txt files in a given directory and its subdirectories.
3
+ It skips files named 'sample-prompt.txt' and any files ending with '-sample-prompts.txt'.
4
+ For each .txt file, it checks if the file contains more than one line.
5
+ If a file contains more than one line, it prints the file name.
6
+
7
+ The script uses the glob and os modules to perform the file search and manipulation.
8
+ It starts the search from a specified directory path.
9
+
10
+ The script is executed by calling the check_files function with the directory path as an argument.
11
+ """
12
+
13
+ import glob
14
+ import os
15
+
16
+
17
+ def check_files(path):
18
+ """
19
+ This function recursively searches for .txt files in a given directory and its subdirectories.
20
+ It skips files named 'sample-prompts.txt' and any files ending with '-sample-prompts.txt'.
21
+ For each .txt file, it checks if the file contains more than one line.
22
+ If a file contains more than one line, it prints the file name.
23
+
24
+ Args:
25
+ path (str): The directory path where the search begins.
26
+
27
+ Returns:
28
+ None. The function prints the file names directly.
29
+ """
30
+ # Use glob to recursively find all .txt files
31
+ for filename in glob.iglob(path + "**/*.txt", recursive=True):
32
+ # Skip 'sample-prompts.txt' and '*-sample-prompts.txt' files
33
+ if os.path.basename(filename) == "sample-prompts.txt" or os.path.basename(
34
+ filename
35
+ ).endswith("-sample-prompts.txt"):
36
+ continue
37
+ # Open each text file and check if it contains more than one line
38
+ with open(filename, "r", encoding="utf-8") as file:
39
+ lines = file.readlines()
40
+ if len(lines) > 1:
41
+ print(filename)
42
+
43
+
44
+ # Call the function with the directory path
45
+ check_files("E:\\training_dir\\")
scripts/{check_img_resolutions.py → check_for_large_images.py} RENAMED
File without changes
scripts/search_for_tag.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ This script is used to search for the word "anthrofied" in all .txt files
3
+ within a specified directory and its subdirectories. It uses multiprocessing
4
+ to speed up the search by checking multiple files simultaneously.
5
+ """
6
+
7
+ import glob
8
+ import os
9
+ import multiprocessing
10
+
11
+
12
+ def check_file(filename):
13
+ """
14
+ Checks if a .txt file contains the word "anthrofied".
15
+
16
+ Args:
17
+ filename (str): The path of the file to check.
18
+
19
+ Returns:
20
+ str: The filename if it contains "anthrofied", otherwise None.
21
+ """
22
+ # Skip 'sample-prompts.txt' and '*-sample-prompts.txt' files
23
+ if os.path.basename(filename) == "sample-prompts.txt" or os.path.basename(
24
+ filename
25
+ ).endswith("-sample-prompts.txt"):
26
+ return None
27
+ # Open each text file and check if it contains the word "anthrofied"
28
+ with open(filename, "r", encoding="utf-8") as file:
29
+ content = file.read()
30
+ if "anthrofied" in content:
31
+ return filename
32
+ return None
33
+
34
+
35
+ def check_files(path):
36
+ """
37
+ Searches for the word "anthrofied" in all .txt files within a specified
38
+ directory and its subdirectories.
39
+
40
+ Args:
41
+ path (str): The path of the directory to search.
42
+ """
43
+ # Use glob to recursively find all .txt files
44
+ filenames = glob.glob(path + "**/*.txt", recursive=True)
45
+
46
+ # Create a pool of processes
47
+ with multiprocessing.Pool() as pool:
48
+ # Use the pool to check each file
49
+ results = pool.map(check_file, filenames)
50
+
51
+ # Print the filenames that contain "anthrofied"
52
+ for result in results:
53
+ if result is not None:
54
+ print(result)
55
+
56
+
57
+ if __name__ == "__main__":
58
+ # Call the function with the directory path
59
+ check_files("E:\\training_dir\\")
scripts/search_for_tags.py ADDED
@@ -0,0 +1,97 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # Define the search terms
2
+ SEARCH_TERMS = [
3
+ r"anthrofied",
4
+ r"canid",
5
+ r"canis",
6
+ r"pokemorph",
7
+ r"pokephilia",
8
+ r"alpha channel",
9
+ r"mythological scalie",
10
+ r"mythological creature",
11
+ r"collaboration",
12
+ r"mythology",
13
+ r"mammal,",
14
+ r"grandfathered content",
15
+ r"mephitid",
16
+ r"generation 1 pokemon",
17
+ r"generation 2 pokemon",
18
+ r"generation 3 pokemon",
19
+ r"generation 4 pokemon",
20
+ r"generation 5 pokemon",
21
+ r"generation 6 pokemon",
22
+ r"generation 7 pokemon",
23
+ r"generation 8 pokemon",
24
+ r"generation 9 pokemon",
25
+ r"generation 10 pokemon",
26
+ ]
27
+
28
+ import glob
29
+ import os
30
+ import multiprocessing
31
+ import re
32
+ from rich.console import Console
33
+ from rich.table import Table
34
+
35
+ # Initialize the console
36
+ console = Console()
37
+
38
+
39
+ def check_file(filename):
40
+ """
41
+ Checks if a .txt file contains any of the specified search terms.
42
+
43
+ Args:
44
+ filename (str): The path of the file to check.
45
+
46
+ Returns:
47
+ tuple: A tuple containing the filename and a list of found search terms.
48
+ """
49
+ # Skip 'sample-prompts.txt' and '*-sample-prompts.txt' files
50
+ if "sample-prompts.txt" in os.path.basename(filename):
51
+ return None
52
+ found_terms = []
53
+ # Open each text file and check if it contains any of the search terms
54
+ with open(filename, "r", encoding="utf-8") as file:
55
+ content = file.read()
56
+ for term in SEARCH_TERMS:
57
+ # Use a regular expression to search for the match 'term' followed by a comma
58
+ pattern = r"\b" + re.escape(term) + r"\b"
59
+ if re.search(pattern, content):
60
+ found_terms.append(term)
61
+ return (filename, found_terms) if found_terms else None
62
+
63
+
64
+ def check_files(path):
65
+ """
66
+ Searches for the specified search terms in all .txt files within a specified
67
+ directory and its subdirectories.
68
+
69
+ Args:
70
+ path (str): The path of the directory to search.
71
+ """
72
+ # Use glob to recursively find all .txt files
73
+ filenames = glob.glob(os.path.join(path, "**/*.txt"), recursive=True)
74
+
75
+ # Create a pool of processes
76
+ with multiprocessing.Pool() as pool:
77
+ # Use the pool to check each file
78
+ results = pool.map(check_file, filenames)
79
+
80
+ # Create a table
81
+ table = Table(show_header=True, header_style="bold magenta")
82
+ table.add_column("Filename", style="dim")
83
+ table.add_column("Found Search Terms")
84
+
85
+ # Print the filenames and found search terms in a table
86
+ for result in results:
87
+ if result is not None:
88
+ filename, terms = result
89
+ table.add_row(filename, ", ".join(terms))
90
+
91
+ # Print the table to the console
92
+ console.print(table)
93
+
94
+
95
+ if __name__ == "__main__":
96
+ # Call the function with the directory path
97
+ check_files("E:\\training_dir\\")
tests/blotter-v1e400.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6837cd9106d4ed13f780e956f9559bc0cdfa613e8aa84789825202107f3d4ffe
3
+ size 240590368