awoo
Browse filesSigned-off-by: Balazs Horvath <[email protected]>
scripts/check_captions_for_single_line.py
ADDED
@@ -0,0 +1,45 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This script is designed to search for .txt files in a given directory and its subdirectories.
|
3 |
+
It skips files named 'sample-prompt.txt' and any files ending with '-sample-prompts.txt'.
|
4 |
+
For each .txt file, it checks if the file contains more than one line.
|
5 |
+
If a file contains more than one line, it prints the file name.
|
6 |
+
|
7 |
+
The script uses the glob and os modules to perform the file search and manipulation.
|
8 |
+
It starts the search from a specified directory path.
|
9 |
+
|
10 |
+
The script is executed by calling the check_files function with the directory path as an argument.
|
11 |
+
"""
|
12 |
+
|
13 |
+
import glob
|
14 |
+
import os
|
15 |
+
|
16 |
+
|
17 |
+
def check_files(path):
|
18 |
+
"""
|
19 |
+
This function recursively searches for .txt files in a given directory and its subdirectories.
|
20 |
+
It skips files named 'sample-prompts.txt' and any files ending with '-sample-prompts.txt'.
|
21 |
+
For each .txt file, it checks if the file contains more than one line.
|
22 |
+
If a file contains more than one line, it prints the file name.
|
23 |
+
|
24 |
+
Args:
|
25 |
+
path (str): The directory path where the search begins.
|
26 |
+
|
27 |
+
Returns:
|
28 |
+
None. The function prints the file names directly.
|
29 |
+
"""
|
30 |
+
# Use glob to recursively find all .txt files
|
31 |
+
for filename in glob.iglob(path + "**/*.txt", recursive=True):
|
32 |
+
# Skip 'sample-prompts.txt' and '*-sample-prompts.txt' files
|
33 |
+
if os.path.basename(filename) == "sample-prompts.txt" or os.path.basename(
|
34 |
+
filename
|
35 |
+
).endswith("-sample-prompts.txt"):
|
36 |
+
continue
|
37 |
+
# Open each text file and check if it contains more than one line
|
38 |
+
with open(filename, "r", encoding="utf-8") as file:
|
39 |
+
lines = file.readlines()
|
40 |
+
if len(lines) > 1:
|
41 |
+
print(filename)
|
42 |
+
|
43 |
+
|
44 |
+
# Call the function with the directory path
|
45 |
+
check_files("E:\\training_dir\\")
|
scripts/{check_img_resolutions.py → check_for_large_images.py}
RENAMED
File without changes
|
scripts/search_for_tag.py
ADDED
@@ -0,0 +1,59 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
"""
|
2 |
+
This script is used to search for the word "anthrofied" in all .txt files
|
3 |
+
within a specified directory and its subdirectories. It uses multiprocessing
|
4 |
+
to speed up the search by checking multiple files simultaneously.
|
5 |
+
"""
|
6 |
+
|
7 |
+
import glob
|
8 |
+
import os
|
9 |
+
import multiprocessing
|
10 |
+
|
11 |
+
|
12 |
+
def check_file(filename):
|
13 |
+
"""
|
14 |
+
Checks if a .txt file contains the word "anthrofied".
|
15 |
+
|
16 |
+
Args:
|
17 |
+
filename (str): The path of the file to check.
|
18 |
+
|
19 |
+
Returns:
|
20 |
+
str: The filename if it contains "anthrofied", otherwise None.
|
21 |
+
"""
|
22 |
+
# Skip 'sample-prompts.txt' and '*-sample-prompts.txt' files
|
23 |
+
if os.path.basename(filename) == "sample-prompts.txt" or os.path.basename(
|
24 |
+
filename
|
25 |
+
).endswith("-sample-prompts.txt"):
|
26 |
+
return None
|
27 |
+
# Open each text file and check if it contains the word "anthrofied"
|
28 |
+
with open(filename, "r", encoding="utf-8") as file:
|
29 |
+
content = file.read()
|
30 |
+
if "anthrofied" in content:
|
31 |
+
return filename
|
32 |
+
return None
|
33 |
+
|
34 |
+
|
35 |
+
def check_files(path):
|
36 |
+
"""
|
37 |
+
Searches for the word "anthrofied" in all .txt files within a specified
|
38 |
+
directory and its subdirectories.
|
39 |
+
|
40 |
+
Args:
|
41 |
+
path (str): The path of the directory to search.
|
42 |
+
"""
|
43 |
+
# Use glob to recursively find all .txt files
|
44 |
+
filenames = glob.glob(path + "**/*.txt", recursive=True)
|
45 |
+
|
46 |
+
# Create a pool of processes
|
47 |
+
with multiprocessing.Pool() as pool:
|
48 |
+
# Use the pool to check each file
|
49 |
+
results = pool.map(check_file, filenames)
|
50 |
+
|
51 |
+
# Print the filenames that contain "anthrofied"
|
52 |
+
for result in results:
|
53 |
+
if result is not None:
|
54 |
+
print(result)
|
55 |
+
|
56 |
+
|
57 |
+
if __name__ == "__main__":
|
58 |
+
# Call the function with the directory path
|
59 |
+
check_files("E:\\training_dir\\")
|
scripts/search_for_tags.py
ADDED
@@ -0,0 +1,97 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
# Define the search terms
|
2 |
+
SEARCH_TERMS = [
|
3 |
+
r"anthrofied",
|
4 |
+
r"canid",
|
5 |
+
r"canis",
|
6 |
+
r"pokemorph",
|
7 |
+
r"pokephilia",
|
8 |
+
r"alpha channel",
|
9 |
+
r"mythological scalie",
|
10 |
+
r"mythological creature",
|
11 |
+
r"collaboration",
|
12 |
+
r"mythology",
|
13 |
+
r"mammal,",
|
14 |
+
r"grandfathered content",
|
15 |
+
r"mephitid",
|
16 |
+
r"generation 1 pokemon",
|
17 |
+
r"generation 2 pokemon",
|
18 |
+
r"generation 3 pokemon",
|
19 |
+
r"generation 4 pokemon",
|
20 |
+
r"generation 5 pokemon",
|
21 |
+
r"generation 6 pokemon",
|
22 |
+
r"generation 7 pokemon",
|
23 |
+
r"generation 8 pokemon",
|
24 |
+
r"generation 9 pokemon",
|
25 |
+
r"generation 10 pokemon",
|
26 |
+
]
|
27 |
+
|
28 |
+
import glob
|
29 |
+
import os
|
30 |
+
import multiprocessing
|
31 |
+
import re
|
32 |
+
from rich.console import Console
|
33 |
+
from rich.table import Table
|
34 |
+
|
35 |
+
# Initialize the console
|
36 |
+
console = Console()
|
37 |
+
|
38 |
+
|
39 |
+
def check_file(filename):
|
40 |
+
"""
|
41 |
+
Checks if a .txt file contains any of the specified search terms.
|
42 |
+
|
43 |
+
Args:
|
44 |
+
filename (str): The path of the file to check.
|
45 |
+
|
46 |
+
Returns:
|
47 |
+
tuple: A tuple containing the filename and a list of found search terms.
|
48 |
+
"""
|
49 |
+
# Skip 'sample-prompts.txt' and '*-sample-prompts.txt' files
|
50 |
+
if "sample-prompts.txt" in os.path.basename(filename):
|
51 |
+
return None
|
52 |
+
found_terms = []
|
53 |
+
# Open each text file and check if it contains any of the search terms
|
54 |
+
with open(filename, "r", encoding="utf-8") as file:
|
55 |
+
content = file.read()
|
56 |
+
for term in SEARCH_TERMS:
|
57 |
+
# Use a regular expression to search for the match 'term' followed by a comma
|
58 |
+
pattern = r"\b" + re.escape(term) + r"\b"
|
59 |
+
if re.search(pattern, content):
|
60 |
+
found_terms.append(term)
|
61 |
+
return (filename, found_terms) if found_terms else None
|
62 |
+
|
63 |
+
|
64 |
+
def check_files(path):
|
65 |
+
"""
|
66 |
+
Searches for the specified search terms in all .txt files within a specified
|
67 |
+
directory and its subdirectories.
|
68 |
+
|
69 |
+
Args:
|
70 |
+
path (str): The path of the directory to search.
|
71 |
+
"""
|
72 |
+
# Use glob to recursively find all .txt files
|
73 |
+
filenames = glob.glob(os.path.join(path, "**/*.txt"), recursive=True)
|
74 |
+
|
75 |
+
# Create a pool of processes
|
76 |
+
with multiprocessing.Pool() as pool:
|
77 |
+
# Use the pool to check each file
|
78 |
+
results = pool.map(check_file, filenames)
|
79 |
+
|
80 |
+
# Create a table
|
81 |
+
table = Table(show_header=True, header_style="bold magenta")
|
82 |
+
table.add_column("Filename", style="dim")
|
83 |
+
table.add_column("Found Search Terms")
|
84 |
+
|
85 |
+
# Print the filenames and found search terms in a table
|
86 |
+
for result in results:
|
87 |
+
if result is not None:
|
88 |
+
filename, terms = result
|
89 |
+
table.add_row(filename, ", ".join(terms))
|
90 |
+
|
91 |
+
# Print the table to the console
|
92 |
+
console.print(table)
|
93 |
+
|
94 |
+
|
95 |
+
if __name__ == "__main__":
|
96 |
+
# Call the function with the directory path
|
97 |
+
check_files("E:\\training_dir\\")
|
tests/blotter-v1e400.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:6837cd9106d4ed13f780e956f9559bc0cdfa613e8aa84789825202107f3d4ffe
|
3 |
+
size 240590368
|