keeeeenw commited on
Commit
291dd47
·
verified ·
1 Parent(s): 851a93c

Upload upload_to_huggingface.py with huggingface_hub

Browse files
Files changed (1) hide show
  1. upload_to_huggingface.py +36 -0
upload_to_huggingface.py ADDED
@@ -0,0 +1,36 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+
3
+ # Get the list of checkpoints from the file system
4
+ checkpoints = sorted([f for f in os.listdir() if f.startswith("step-000")])
5
+
6
+ # Read the last uploaded checkpoint
7
+ try:
8
+ with open("checkpoint_uploaded.txt", "r") as log_file:
9
+ uploaded_checkpoints = log_file.read().splitlines()
10
+ last_uploaded = uploaded_checkpoints[-1] if uploaded_checkpoints else ""
11
+ except FileNotFoundError:
12
+ last_uploaded = ""
13
+ print("Last file uploaded", last_uploaded)
14
+
15
+ # Consider only files after the last uploaded checkpoint
16
+ if last_uploaded:
17
+ remaining_checkpoints = [ckpt for ckpt in checkpoints if ckpt > last_uploaded]
18
+ else:
19
+ remaining_checkpoints = checkpoints
20
+ print("Remaining files", remaining_checkpoints)
21
+
22
+ # Select 10 evenly spaced checkpoints from remaining ones
23
+ if remaining_checkpoints:
24
+ selected_checkpoints = [remaining_checkpoints[i] for i in range(0, len(remaining_checkpoints), max(1, len(remaining_checkpoints)//10))][:10]
25
+ else:
26
+ selected_checkpoints = []
27
+ print("Files to upload", selected_checkpoints)
28
+
29
+ # Upload each checkpoint using Hugging Face CLI and log the uploaded checkpoints
30
+ repo = "keeeeenw/MicroLlama2-checkpoints"
31
+ with open("checkpoint_uploaded.txt", "a") as log_file:
32
+ for checkpoint in selected_checkpoints:
33
+ command = ["huggingface-cli", "upload", repo, checkpoint, checkpoint]
34
+ print(f"Uploading {checkpoint}...")
35
+ os.system(" ".join(command))
36
+ log_file.write(checkpoint + "\n")