|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.3699906803355079, |
|
"eval_steps": 318, |
|
"global_step": 1588, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.00023299161230195712, |
|
"grad_norm": 6.78125, |
|
"learning_rate": 2e-06, |
|
"loss": 0.7236, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.023299161230195712, |
|
"grad_norm": 0.09033203125, |
|
"learning_rate": 0.0002, |
|
"loss": 0.3799, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.046598322460391424, |
|
"grad_norm": 0.11865234375, |
|
"learning_rate": 0.0004, |
|
"loss": 0.2452, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.06989748369058714, |
|
"grad_norm": 0.1396484375, |
|
"learning_rate": 0.0006, |
|
"loss": 0.2131, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.07409133271202237, |
|
"eval_peoplespeech-clean-transcription_loss": 3.0843491554260254, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, |
|
"eval_peoplespeech-clean-transcription_runtime": 14.6247, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 4.376, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.068, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.09319664492078285, |
|
"grad_norm": 0.07421875, |
|
"learning_rate": 0.0008, |
|
"loss": 0.155, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.11649580615097857, |
|
"grad_norm": 0.0576171875, |
|
"learning_rate": 0.001, |
|
"loss": 0.1083, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.13979496738117428, |
|
"grad_norm": 0.04736328125, |
|
"learning_rate": 0.0012, |
|
"loss": 0.0926, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.14818266542404473, |
|
"eval_peoplespeech-clean-transcription_loss": 1.7947263717651367, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, |
|
"eval_peoplespeech-clean-transcription_runtime": 13.9513, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 4.587, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.072, |
|
"step": 636 |
|
}, |
|
{ |
|
"epoch": 0.16309412861137, |
|
"grad_norm": 0.04150390625, |
|
"learning_rate": 0.0014, |
|
"loss": 0.0844, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.1863932898415657, |
|
"grad_norm": 0.034912109375, |
|
"learning_rate": 0.0016, |
|
"loss": 0.0793, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.2096924510717614, |
|
"grad_norm": 0.032958984375, |
|
"learning_rate": 0.0018000000000000002, |
|
"loss": 0.0758, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.2222739981360671, |
|
"eval_peoplespeech-clean-transcription_loss": 1.7458150386810303, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, |
|
"eval_peoplespeech-clean-transcription_runtime": 14.2202, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 4.501, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.07, |
|
"step": 954 |
|
}, |
|
{ |
|
"epoch": 0.23299161230195714, |
|
"grad_norm": 0.03125, |
|
"learning_rate": 0.002, |
|
"loss": 0.0732, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.25629077353215285, |
|
"grad_norm": 0.02783203125, |
|
"learning_rate": 0.0019984487567773325, |
|
"loss": 0.0714, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.27958993476234856, |
|
"grad_norm": 0.0289306640625, |
|
"learning_rate": 0.0019938003745660765, |
|
"loss": 0.0691, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.29636533084808947, |
|
"eval_peoplespeech-clean-transcription_loss": 1.7118018865585327, |
|
"eval_peoplespeech-clean-transcription_model_preparation_time": 0.0063, |
|
"eval_peoplespeech-clean-transcription_runtime": 14.7608, |
|
"eval_peoplespeech-clean-transcription_samples_per_second": 4.336, |
|
"eval_peoplespeech-clean-transcription_steps_per_second": 0.068, |
|
"step": 1272 |
|
}, |
|
{ |
|
"epoch": 0.30288909599254427, |
|
"grad_norm": 0.027587890625, |
|
"learning_rate": 0.0019860708773026797, |
|
"loss": 0.0676, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.32618825722274, |
|
"grad_norm": 0.0257568359375, |
|
"learning_rate": 0.001975286910165463, |
|
"loss": 0.0657, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.3494874184529357, |
|
"grad_norm": 0.0244140625, |
|
"learning_rate": 0.0019614856477231713, |
|
"loss": 0.0648, |
|
"step": 1500 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 6350, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 2, |
|
"save_steps": 1588, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.768821783434887e+18, |
|
"train_batch_size": 672, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|