File size: 6,290 Bytes
f574f24 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 |
_name: null
common:
_name: null
no_progress_bar: false
log_interval: 200
log_format: json
log_file: null
tensorboard_logdir: tblog
wandb_project: AVSP-LLM
azureml_logging: false
seed: 1337
cpu: false
tpu: false
bf16: false
memory_efficient_bf16: false
fp16: true
memory_efficient_fp16: false
fp16_no_flatten_grads: false
fp16_init_scale: 128
fp16_scale_window: null
fp16_scale_tolerance: 0.0
on_cpu_convert_precision: false
min_loss_scale: 0.0001
threshold_loss_scale: null
amp: false
amp_batch_retries: 2
amp_init_scale: 128
amp_scale_window: null
user_dir: /home/theodore/Projects/VSP-LLM/src
empty_cache_freq: 0
all_gather_list_size: 16384
model_parallel_size: 1
quantization_config_path: null
profile: false
reset_logging: false
suppress_crashes: false
use_plasma_view: false
plasma_path: /tmp/plasma
common_eval:
_name: null
path: null
post_process: null
quiet: false
model_overrides: '{}'
results_path: null
distributed_training:
_name: null
distributed_world_size: 1
distributed_num_procs: 1
distributed_rank: 0
distributed_backend: nccl
distributed_init_method: null
distributed_port: -1
device_id: 0
distributed_no_spawn: false
ddp_backend: no_c10d
ddp_comm_hook: none
bucket_cap_mb: 25
fix_batches_to_gpus: false
find_unused_parameters: true
fast_stat_sync: false
heartbeat_timeout: -1
broadcast_buffers: false
slowmo_momentum: null
slowmo_algorithm: LocalSGD
localsgd_frequency: 3
nprocs_per_node: 1
pipeline_model_parallel: false
pipeline_balance: null
pipeline_devices: null
pipeline_chunks: 0
pipeline_encoder_balance: null
pipeline_encoder_devices: null
pipeline_decoder_balance: null
pipeline_decoder_devices: null
pipeline_checkpoint: never
zero_sharding: none
fp16: ${common.fp16}
memory_efficient_fp16: ${common.memory_efficient_fp16}
tpu: ${common.tpu}
no_reshard_after_forward: false
fp32_reduce_scatter: false
cpu_offload: false
use_sharded_state: false
dataset:
_name: null
num_workers: 0
skip_invalid_size_inputs_valid_test: false
max_tokens: null
batch_size: 1
required_batch_size_multiple: 8
required_seq_len_multiple: 1
dataset_impl: null
data_buffer_size: 10
train_subset: train
valid_subset: valid
combine_valid_subsets: null
ignore_unused_valid_subsets: false
validate_interval: 1
validate_interval_updates: 0
validate_after_updates: 0
fixed_validation_seed: null
disable_validation: false
max_tokens_valid: ${dataset.max_tokens}
batch_size_valid: ${dataset.batch_size}
max_valid_steps: null
curriculum: 0
gen_subset: test
num_shards: 1
shard_id: 0
optimization:
_name: null
max_epoch: 0
max_update: 30000
stop_time_hours: 0.0
clip_norm: 0.0
sentence_avg: true
update_freq:
- 8
lr:
- 0.0005
stop_min_lr: -1.0
use_bmuf: false
checkpoint:
_name: null
save_dir: checkpoints
restore_file: checkpoint_last.pt
finetune_from_model: null
reset_dataloader: false
reset_lr_scheduler: false
reset_meters: false
reset_optimizer: false
optimizer_overrides: '{}'
save_interval: 1
save_interval_updates: 2500
keep_interval_updates: 1
keep_interval_updates_pattern: -1
keep_last_epochs: -1
keep_best_checkpoints: -1
no_save: false
no_epoch_checkpoints: true
no_last_checkpoints: false
no_save_optimizer_state: false
best_checkpoint_metric: accuracy
maximize_best_checkpoint_metric: true
patience: -1
checkpoint_suffix: ''
checkpoint_shard_count: 1
load_checkpoint_on_all_dp_ranks: false
write_checkpoints_asynchronously: false
model_parallel_size: ${common.model_parallel_size}
bmuf:
_name: null
block_lr: 1.0
block_momentum: 0.875
global_sync_iter: 50
warmup_iterations: 500
use_nbm: false
average_sync: false
distributed_world_size: ${distributed_training.distributed_world_size}
generation:
_name: null
beam: 5
nbest: 1
max_len_a: 0.0
max_len_b: 200
min_len: 1
match_source_len: false
unnormalized: false
no_early_stop: false
no_beamable_mm: false
lenpen: 1.0
unkpen: 0.0
replace_unk: null
sacrebleu: false
score_reference: false
prefix_size: 0
no_repeat_ngram_size: 0
sampling: false
sampling_topk: -1
sampling_topp: -1.0
constraints: null
temperature: 1.0
diverse_beam_groups: -1
diverse_beam_strength: 0.5
diversity_rate: -1.0
print_alignment: null
print_step: false
lm_path: null
lm_weight: 0.0
iter_decode_eos_penalty: 0.0
iter_decode_max_iter: 10
iter_decode_force_max_iter: false
iter_decode_with_beam: 1
iter_decode_with_external_reranker: false
retain_iter_history: false
retain_dropout: false
retain_dropout_modules: null
decoding_format: null
no_seed_provided: false
eval_lm:
_name: null
output_word_probs: false
output_word_stats: false
context_window: 0
softmax_batch: 9223372036854775807
interactive:
_name: null
buffer_size: 0
input: '-'
model:
_name: vsp_llm
w2v_path: /home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt
llm_ckpt_path: vilm/vinallama-2.7b
apply_mask: false
mask_selection: static
mask_length: 10
mask_other: 0
mask_prob: 0.75
mask_channel_selection: static
mask_channel_length: 64
mask_channel_other: 0
mask_channel_prob: 0.5
layerdrop: 0.1
dropout: 0.0
activation_dropout: 0.1
attention_dropout: 0.0
feature_grad_mult: 1.0
encoder_embed_dim: 1024
decoder_embed_dim: 4096
freeze_finetune_updates: 18000
task:
_name: vsp_llm_training
is_s2s: true
data: /home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/100h
label_dir: /home/theodore/Projects/VSP-LLM/data/processed/vasr/audio-visual/100h
normalize: true
labels:
- wrd
single_target: true
fine_tuning: true
stack_order_audio: 4
max_sample_size: 500
modalities:
- video
- audio
image_aug: true
pad_audio: true
random_crop: false
llm_ckpt_path: vilm/vinallama-2.7b
criterion:
_name: decoder_only_language_modeling_loss
report_accuracy: true
label_smoothing: 0.1
optimizer:
_name: adam
adam_betas: (0.9,0.98)
adam_eps: 1.0e-08
lr_scheduler:
_name: tri_stage
warmup_steps: 10000
hold_steps: 0
decay_steps: 20000
final_lr_scale: 0.05
scoring: null
bpe: null
tokenizer: null
|