dataset
Browse files
scripts/prepare_pretrain_dataset.py
CHANGED
@@ -176,4 +176,5 @@ outputs = optimize(
|
|
176 |
output_dir='../data/',
|
177 |
# Number of tokens to store by chunks. This is roughly 64MB of tokens per chunk.
|
178 |
chunk_size=(2049 * 8012),
|
|
|
179 |
)
|
|
|
176 |
output_dir='../data/',
|
177 |
# Number of tokens to store by chunks. This is roughly 64MB of tokens per chunk.
|
178 |
chunk_size=(2049 * 8012),
|
179 |
+
num_workers=16,
|
180 |
)
|