MicroLlama2-checkpoints / upload_to_huggingface.py
keeeeenw's picture
Upload upload_to_huggingface.py with huggingface_hub
291dd47 verified
raw
history blame
1.46 kB
import os
# Get the list of checkpoints from the file system
checkpoints = sorted([f for f in os.listdir() if f.startswith("step-000")])
# Read the last uploaded checkpoint
try:
with open("checkpoint_uploaded.txt", "r") as log_file:
uploaded_checkpoints = log_file.read().splitlines()
last_uploaded = uploaded_checkpoints[-1] if uploaded_checkpoints else ""
except FileNotFoundError:
last_uploaded = ""
print("Last file uploaded", last_uploaded)
# Consider only files after the last uploaded checkpoint
if last_uploaded:
remaining_checkpoints = [ckpt for ckpt in checkpoints if ckpt > last_uploaded]
else:
remaining_checkpoints = checkpoints
print("Remaining files", remaining_checkpoints)
# Select 10 evenly spaced checkpoints from remaining ones
if remaining_checkpoints:
selected_checkpoints = [remaining_checkpoints[i] for i in range(0, len(remaining_checkpoints), max(1, len(remaining_checkpoints)//10))][:10]
else:
selected_checkpoints = []
print("Files to upload", selected_checkpoints)
# Upload each checkpoint using Hugging Face CLI and log the uploaded checkpoints
repo = "keeeeenw/MicroLlama2-checkpoints"
with open("checkpoint_uploaded.txt", "a") as log_file:
for checkpoint in selected_checkpoints:
command = ["huggingface-cli", "upload", repo, checkpoint, checkpoint]
print(f"Uploading {checkpoint}...")
os.system(" ".join(command))
log_file.write(checkpoint + "\n")