File size: 6,276 Bytes
_name: null
common:
  _name: null
  no_progress_bar: false
  log_interval: 200
  log_format: json
  log_file: null
  tensorboard_logdir: tblog
  wandb_project: AVSP-LLM
  azureml_logging: false
  seed: 1337
  cpu: false
  tpu: false
  bf16: false
  memory_efficient_bf16: false
  fp16: true
  memory_efficient_fp16: false
  fp16_no_flatten_grads: false
  fp16_init_scale: 128
  fp16_scale_window: null
  fp16_scale_tolerance: 0.0
  on_cpu_convert_precision: false
  min_loss_scale: 0.0001
  threshold_loss_scale: null
  amp: false
  amp_batch_retries: 2
  amp_init_scale: 128
  amp_scale_window: null
  user_dir: /home/theodore/Projects/VSP-LLM/src
  empty_cache_freq: 0
  all_gather_list_size: 16384
  model_parallel_size: 1
  quantization_config_path: null
  profile: false
  reset_logging: false
  suppress_crashes: false
  use_plasma_view: false
  plasma_path: /tmp/plasma
common_eval:
  _name: null
  path: null
  post_process: null
  quiet: false
  model_overrides: '{}'
  results_path: null
distributed_training:
  _name: null
  distributed_world_size: 1
  distributed_num_procs: 1
  distributed_rank: 0
  distributed_backend: nccl
  distributed_init_method: null
  distributed_port: -1
  device_id: 0
  distributed_no_spawn: false
  ddp_backend: no_c10d
  ddp_comm_hook: none
  bucket_cap_mb: 25
  fix_batches_to_gpus: false
  find_unused_parameters: true
  fast_stat_sync: false
  heartbeat_timeout: -1
  broadcast_buffers: false
  slowmo_momentum: null
  slowmo_algorithm: LocalSGD
  localsgd_frequency: 3
  nprocs_per_node: 1
  pipeline_model_parallel: false
  pipeline_balance: null
  pipeline_devices: null
  pipeline_chunks: 0
  pipeline_encoder_balance: null
  pipeline_encoder_devices: null
  pipeline_decoder_balance: null
  pipeline_decoder_devices: null
  pipeline_checkpoint: never
  zero_sharding: none
  fp16: ${common.fp16}
  memory_efficient_fp16: ${common.memory_efficient_fp16}
  tpu: ${common.tpu}
  no_reshard_after_forward: false
  fp32_reduce_scatter: false
  cpu_offload: false
  use_sharded_state: false
dataset:
  _name: null
  num_workers: 0
  skip_invalid_size_inputs_valid_test: false
  max_tokens: null
  batch_size: 1
  required_batch_size_multiple: 8
  required_seq_len_multiple: 1
  dataset_impl: null
  data_buffer_size: 10
  train_subset: train
  valid_subset: valid
  combine_valid_subsets: null
  ignore_unused_valid_subsets: false
  validate_interval: 1
  validate_interval_updates: 0
  validate_after_updates: 0
  fixed_validation_seed: null
  disable_validation: false
  max_tokens_valid: ${dataset.max_tokens}
  batch_size_valid: ${dataset.batch_size}
  max_valid_steps: null
  curriculum: 0
  gen_subset: test
  num_shards: 1
  shard_id: 0
optimization:
  _name: null
  max_epoch: 0
  max_update: 30000
  stop_time_hours: 0.0
  clip_norm: 0.0
  sentence_avg: true
  update_freq:
  - 8
  lr:
  - 0.0005
  stop_min_lr: -1.0
  use_bmuf: false
checkpoint:
  _name: null
  save_dir: checkpoints
  restore_file: checkpoint_last.pt
  finetune_from_model: null
  reset_dataloader: false
  reset_lr_scheduler: false
  reset_meters: false
  reset_optimizer: false
  optimizer_overrides: '{}'
  save_interval: 1
  save_interval_updates: 2500
  keep_interval_updates: 1
  keep_interval_updates_pattern: -1
  keep_last_epochs: -1
  keep_best_checkpoints: -1
  no_save: false
  no_epoch_checkpoints: true
  no_last_checkpoints: false
  no_save_optimizer_state: false
  best_checkpoint_metric: accuracy
  maximize_best_checkpoint_metric: true
  patience: -1
  checkpoint_suffix: ''
  checkpoint_shard_count: 1
  load_checkpoint_on_all_dp_ranks: false
  write_checkpoints_asynchronously: false
  model_parallel_size: ${common.model_parallel_size}
bmuf:
  _name: null
  block_lr: 1.0
  block_momentum: 0.875
  global_sync_iter: 50
  warmup_iterations: 500
  use_nbm: false
  average_sync: false
  distributed_world_size: ${distributed_training.distributed_world_size}
generation:
  _name: null
  beam: 5
  nbest: 1
  max_len_a: 0.0
  max_len_b: 200
  min_len: 1
  match_source_len: false
  unnormalized: false
  no_early_stop: false
  no_beamable_mm: false
  lenpen: 1.0
  unkpen: 0.0
  replace_unk: null
  sacrebleu: false
  score_reference: false
  prefix_size: 0
  no_repeat_ngram_size: 0
  sampling: false
  sampling_topk: -1
  sampling_topp: -1.0
  constraints: null
  temperature: 1.0
  diverse_beam_groups: -1
  diverse_beam_strength: 0.5
  diversity_rate: -1.0
  print_alignment: null
  print_step: false
  lm_path: null
  lm_weight: 0.0
  iter_decode_eos_penalty: 0.0
  iter_decode_max_iter: 10
  iter_decode_force_max_iter: false
  iter_decode_with_beam: 1
  iter_decode_with_external_reranker: false
  retain_iter_history: false
  retain_dropout: false
  retain_dropout_modules: null
  decoding_format: null
  no_seed_provided: false
eval_lm:
  _name: null
  output_word_probs: false
  output_word_stats: false
  context_window: 0
  softmax_batch: 9223372036854775807
interactive:
  _name: null
  buffer_size: 0
  input: '-'
model:
  _name: vsp_llm
  w2v_path: /home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt
  llm_ckpt_path: vilm/vinallama-2.7b
  apply_mask: false
  mask_selection: static
  mask_length: 10
  mask_other: 0
  mask_prob: 0.75
  mask_channel_selection: static
  mask_channel_length: 64
  mask_channel_other: 0
  mask_channel_prob: 0.5
  layerdrop: 0.1
  dropout: 0.0
  activation_dropout: 0.1
  attention_dropout: 0.0
  feature_grad_mult: 1.0
  encoder_embed_dim: 1024
  decoder_embed_dim: 4096
  freeze_finetune_updates: 18000
task:
  _name: vsp_llm_training
  is_s2s: true
  data: /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/1_2_2
  label_dir: /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/1_2_2
  normalize: true
  labels:
  - wrd
  single_target: true
  fine_tuning: true
  stack_order_audio: 4
  max_sample_size: 500
  modalities:
  - video
  - audio
  image_aug: true
  pad_audio: true
  random_crop: false
  llm_ckpt_path: vilm/vinallama-2.7b
criterion:
  _name: decoder_only_language_modeling_loss
  report_accuracy: true
  label_smoothing: 0.1
optimizer:
  _name: adam
  adam_betas: (0.9,0.98)
  adam_eps: 1.0e-08
lr_scheduler:
  _name: tri_stage
  warmup_steps: 10000
  hold_steps: 0
  decay_steps: 20000
  final_lr_scale: 0.05
scoring: null
bpe: null
tokenizer: null