sayakpaul
/

vjepa-ckpts

Model card Files Files and versions Community

vjepa-ckpts / vitl16.yaml

sayakpaul's picture

sayakpaul HF staff

Upload folder using huggingface_hub

2d00d92 verified 8 days ago

history blame contribute delete

1.62 kB

	app: vjepa
	nodes: 16
	tasks_per_node: 8
	data:
	dataset_type: VideoDataset
	datasets:
	- /your_path_to_kinetics710_csv_file_index.csv
	- /your_path_to_ssv2_csv_file_index.csv
	- /your_path_to_howto100m_csv_file_index.csv
	decode_one_clip: true
	batch_size: 24
	num_clips: 1
	num_frames: 16
	tubelet_size: 2
	sampling_rate: 4
	crop_size: 224
	patch_size: 16
	pin_mem: true
	num_workers: 12
	filter_short_videos: false
	clip_duration: null
	data_aug:
	auto_augment: false
	motion_shift: false
	random_resize_aspect_ratio:
	- 0.75
	- 1.35
	random_resize_scale:
	- 0.3
	- 1.0
	reprob: 0.0
	logging:
	folder: /your_absolute_file_path_for_saving_logs_and_checkpoints/
	write_tag: jepa
	loss:
	loss_exp: 1.0
	reg_coeff: 0.0
	mask:
	- aspect_ratio:
	- 0.75
	- 1.5
	num_blocks: 8
	spatial_scale:
	- 0.15
	- 0.15
	temporal_scale:
	- 1.0
	- 1.0
	max_temporal_keep: 1.0
	max_keep: null
	- aspect_ratio:
	- 0.75
	- 1.5
	num_blocks: 2
	spatial_scale:
	- 0.7
	- 0.7
	temporal_scale:
	- 1.0
	- 1.0
	max_temporal_keep: 1.0
	max_keep: null
	meta:
	load_checkpoint: false
	read_checkpoint: null
	seed: 234
	eval_freq: 100
	use_sdpa: true
	dtype: bfloat16
	model:
	model_name: vit_large
	pred_depth: 12
	pred_embed_dim: 384
	uniform_power: true
	use_mask_tokens: true
	zero_init_mask_tokens: true
	optimization:
	ipe: 300
	ipe_scale: 1.25
	clip_grad: 10.0
	weight_decay: 0.04
	final_weight_decay: 0.4
	epochs: 300
	warmup: 40
	start_lr: 0.0002
	lr: 0.000625
	final_lr: 1.0e-06
	ema:
	- 0.998
	- 1.0