File size: 6,276 Bytes
4721857
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
_name: null
common:
  _name: null
  no_progress_bar: false
  log_interval: 200
  log_format: json
  log_file: null
  tensorboard_logdir: tblog
  wandb_project: AVSP-LLM
  azureml_logging: false
  seed: 1337
  cpu: false
  tpu: false
  bf16: false
  memory_efficient_bf16: false
  fp16: true
  memory_efficient_fp16: false
  fp16_no_flatten_grads: false
  fp16_init_scale: 128
  fp16_scale_window: null
  fp16_scale_tolerance: 0.0
  on_cpu_convert_precision: false
  min_loss_scale: 0.0001
  threshold_loss_scale: null
  amp: false
  amp_batch_retries: 2
  amp_init_scale: 128
  amp_scale_window: null
  user_dir: /home/theodore/Projects/VSP-LLM/src
  empty_cache_freq: 0
  all_gather_list_size: 16384
  model_parallel_size: 1
  quantization_config_path: null
  profile: false
  reset_logging: false
  suppress_crashes: false
  use_plasma_view: false
  plasma_path: /tmp/plasma
common_eval:
  _name: null
  path: null
  post_process: null
  quiet: false
  model_overrides: '{}'
  results_path: null
distributed_training:
  _name: null
  distributed_world_size: 1
  distributed_num_procs: 1
  distributed_rank: 0
  distributed_backend: nccl
  distributed_init_method: null
  distributed_port: -1
  device_id: 0
  distributed_no_spawn: false
  ddp_backend: no_c10d
  ddp_comm_hook: none
  bucket_cap_mb: 25
  fix_batches_to_gpus: false
  find_unused_parameters: true
  fast_stat_sync: false
  heartbeat_timeout: -1
  broadcast_buffers: false
  slowmo_momentum: null
  slowmo_algorithm: LocalSGD
  localsgd_frequency: 3
  nprocs_per_node: 1
  pipeline_model_parallel: false
  pipeline_balance: null
  pipeline_devices: null
  pipeline_chunks: 0
  pipeline_encoder_balance: null
  pipeline_encoder_devices: null
  pipeline_decoder_balance: null
  pipeline_decoder_devices: null
  pipeline_checkpoint: never
  zero_sharding: none
  fp16: ${common.fp16}
  memory_efficient_fp16: ${common.memory_efficient_fp16}
  tpu: ${common.tpu}
  no_reshard_after_forward: false
  fp32_reduce_scatter: false
  cpu_offload: false
  use_sharded_state: false
dataset:
  _name: null
  num_workers: 0
  skip_invalid_size_inputs_valid_test: false
  max_tokens: null
  batch_size: 1
  required_batch_size_multiple: 8
  required_seq_len_multiple: 1
  dataset_impl: null
  data_buffer_size: 10
  train_subset: train
  valid_subset: valid
  combine_valid_subsets: null
  ignore_unused_valid_subsets: false
  validate_interval: 1
  validate_interval_updates: 0
  validate_after_updates: 0
  fixed_validation_seed: null
  disable_validation: false
  max_tokens_valid: ${dataset.max_tokens}
  batch_size_valid: ${dataset.batch_size}
  max_valid_steps: null
  curriculum: 0
  gen_subset: test
  num_shards: 1
  shard_id: 0
optimization:
  _name: null
  max_epoch: 0
  max_update: 30000
  stop_time_hours: 0.0
  clip_norm: 0.0
  sentence_avg: true
  update_freq:
  - 8
  lr:
  - 0.0005
  stop_min_lr: -1.0
  use_bmuf: false
checkpoint:
  _name: null
  save_dir: checkpoints
  restore_file: checkpoint_last.pt
  finetune_from_model: null
  reset_dataloader: false
  reset_lr_scheduler: false
  reset_meters: false
  reset_optimizer: false
  optimizer_overrides: '{}'
  save_interval: 1
  save_interval_updates: 2500
  keep_interval_updates: 1
  keep_interval_updates_pattern: -1
  keep_last_epochs: -1
  keep_best_checkpoints: -1
  no_save: false
  no_epoch_checkpoints: true
  no_last_checkpoints: false
  no_save_optimizer_state: false
  best_checkpoint_metric: accuracy
  maximize_best_checkpoint_metric: true
  patience: -1
  checkpoint_suffix: ''
  checkpoint_shard_count: 1
  load_checkpoint_on_all_dp_ranks: false
  write_checkpoints_asynchronously: false
  model_parallel_size: ${common.model_parallel_size}
bmuf:
  _name: null
  block_lr: 1.0
  block_momentum: 0.875
  global_sync_iter: 50
  warmup_iterations: 500
  use_nbm: false
  average_sync: false
  distributed_world_size: ${distributed_training.distributed_world_size}
generation:
  _name: null
  beam: 5
  nbest: 1
  max_len_a: 0.0
  max_len_b: 200
  min_len: 1
  match_source_len: false
  unnormalized: false
  no_early_stop: false
  no_beamable_mm: false
  lenpen: 1.0
  unkpen: 0.0
  replace_unk: null
  sacrebleu: false
  score_reference: false
  prefix_size: 0
  no_repeat_ngram_size: 0
  sampling: false
  sampling_topk: -1
  sampling_topp: -1.0
  constraints: null
  temperature: 1.0
  diverse_beam_groups: -1
  diverse_beam_strength: 0.5
  diversity_rate: -1.0
  print_alignment: null
  print_step: false
  lm_path: null
  lm_weight: 0.0
  iter_decode_eos_penalty: 0.0
  iter_decode_max_iter: 10
  iter_decode_force_max_iter: false
  iter_decode_with_beam: 1
  iter_decode_with_external_reranker: false
  retain_iter_history: false
  retain_dropout: false
  retain_dropout_modules: null
  decoding_format: null
  no_seed_provided: false
eval_lm:
  _name: null
  output_word_probs: false
  output_word_stats: false
  context_window: 0
  softmax_batch: 9223372036854775807
interactive:
  _name: null
  buffer_size: 0
  input: '-'
model:
  _name: vsp_llm
  w2v_path: /home/theodore/Projects/VSP-LLM/checkpoints/large_vox_iter5.pt
  llm_ckpt_path: vilm/vinallama-2.7b
  apply_mask: false
  mask_selection: static
  mask_length: 10
  mask_other: 0
  mask_prob: 0.75
  mask_channel_selection: static
  mask_channel_length: 64
  mask_channel_other: 0
  mask_channel_prob: 0.5
  layerdrop: 0.1
  dropout: 0.0
  activation_dropout: 0.1
  attention_dropout: 0.0
  feature_grad_mult: 1.0
  encoder_embed_dim: 1024
  decoder_embed_dim: 4096
  freeze_finetune_updates: 18000
task:
  _name: vsp_llm_training
  is_s2s: true
  data: /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/1_2_2
  label_dir: /home/theodore/Projects/VSP-LLM/data/processed/vasr/100h/1_2_2
  normalize: true
  labels:
  - wrd
  single_target: true
  fine_tuning: true
  stack_order_audio: 4
  max_sample_size: 500
  modalities:
  - video
  - audio
  image_aug: true
  pad_audio: true
  random_crop: false
  llm_ckpt_path: vilm/vinallama-2.7b
criterion:
  _name: decoder_only_language_modeling_loss
  report_accuracy: true
  label_smoothing: 0.1
optimizer:
  _name: adam
  adam_betas: (0.9,0.98)
  adam_eps: 1.0e-08
lr_scheduler:
  _name: tri_stage
  warmup_steps: 10000
  hold_steps: 0
  decay_steps: 20000
  final_lr_scale: 0.05
scoring: null
bpe: null
tokenizer: null