Upload upload_to_huggingface.py with huggingface_hub
Browse files- upload_to_huggingface.py +36 -0
upload_to_huggingface.py
ADDED
@@ -0,0 +1,36 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import os
|
2 |
+
|
3 |
+
# Get the list of checkpoints from the file system
|
4 |
+
checkpoints = sorted([f for f in os.listdir() if f.startswith("step-000")])
|
5 |
+
|
6 |
+
# Read the last uploaded checkpoint
|
7 |
+
try:
|
8 |
+
with open("checkpoint_uploaded.txt", "r") as log_file:
|
9 |
+
uploaded_checkpoints = log_file.read().splitlines()
|
10 |
+
last_uploaded = uploaded_checkpoints[-1] if uploaded_checkpoints else ""
|
11 |
+
except FileNotFoundError:
|
12 |
+
last_uploaded = ""
|
13 |
+
print("Last file uploaded", last_uploaded)
|
14 |
+
|
15 |
+
# Consider only files after the last uploaded checkpoint
|
16 |
+
if last_uploaded:
|
17 |
+
remaining_checkpoints = [ckpt for ckpt in checkpoints if ckpt > last_uploaded]
|
18 |
+
else:
|
19 |
+
remaining_checkpoints = checkpoints
|
20 |
+
print("Remaining files", remaining_checkpoints)
|
21 |
+
|
22 |
+
# Select 10 evenly spaced checkpoints from remaining ones
|
23 |
+
if remaining_checkpoints:
|
24 |
+
selected_checkpoints = [remaining_checkpoints[i] for i in range(0, len(remaining_checkpoints), max(1, len(remaining_checkpoints)//10))][:10]
|
25 |
+
else:
|
26 |
+
selected_checkpoints = []
|
27 |
+
print("Files to upload", selected_checkpoints)
|
28 |
+
|
29 |
+
# Upload each checkpoint using Hugging Face CLI and log the uploaded checkpoints
|
30 |
+
repo = "keeeeenw/MicroLlama2-checkpoints"
|
31 |
+
with open("checkpoint_uploaded.txt", "a") as log_file:
|
32 |
+
for checkpoint in selected_checkpoints:
|
33 |
+
command = ["huggingface-cli", "upload", repo, checkpoint, checkpoint]
|
34 |
+
print(f"Uploading {checkpoint}...")
|
35 |
+
os.system(" ".join(command))
|
36 |
+
log_file.write(checkpoint + "\n")
|