|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 50.0, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 1.9960000000000002e-05, |
|
"loss": 2.314, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"learning_rate": 1.9e-05, |
|
"loss": 2.311, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"eval_accuracy": 0.15, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 36.27968215942383, |
|
"eval_f1": 0.08571428571428572, |
|
"eval_gpu_ram_allocated": 4.843059062957764, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 40, |
|
"eval_loss": 2.3033015727996826, |
|
"eval_precision": 0.11052631578947367, |
|
"eval_recall": 0.15, |
|
"eval_runtime": 0.2186, |
|
"eval_samples_per_second": 91.494, |
|
"eval_steps_per_second": 13.724, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 5.000797271728516, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"learning_rate": 1.8e-05, |
|
"loss": 2.2703, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_accuracy": 0.2, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 36.279693603515625, |
|
"eval_f1": 0.06857142857142857, |
|
"eval_gpu_ram_allocated": 4.843057632446289, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 38, |
|
"eval_loss": 2.3011314868927, |
|
"eval_precision": 0.041666666666666664, |
|
"eval_recall": 0.2, |
|
"eval_runtime": 0.2293, |
|
"eval_samples_per_second": 87.241, |
|
"eval_steps_per_second": 13.086, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 5.012126922607422, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"learning_rate": 1.704e-05, |
|
"loss": 2.0062, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 7.5, |
|
"eval_accuracy": 0.15, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 36.27973175048828, |
|
"eval_f1": 0.07936507936507937, |
|
"eval_gpu_ram_allocated": 4.843067646026611, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 42, |
|
"eval_loss": 2.2817370891571045, |
|
"eval_precision": 0.05428571428571429, |
|
"eval_recall": 0.15, |
|
"eval_runtime": 0.208, |
|
"eval_samples_per_second": 96.173, |
|
"eval_steps_per_second": 14.426, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.985599517822266, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"learning_rate": 1.6040000000000002e-05, |
|
"loss": 1.49, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_accuracy": 0.2, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 36.27973556518555, |
|
"eval_f1": 0.11777777777777779, |
|
"eval_gpu_ram_allocated": 4.843059062957764, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 37, |
|
"eval_loss": 2.3280632495880127, |
|
"eval_precision": 0.0869047619047619, |
|
"eval_recall": 0.2, |
|
"eval_runtime": 0.208, |
|
"eval_samples_per_second": 96.137, |
|
"eval_steps_per_second": 14.421, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.982398986816406, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"learning_rate": 1.5040000000000002e-05, |
|
"loss": 0.9424, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 12.5, |
|
"eval_accuracy": 0.25, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 36.27980041503906, |
|
"eval_f1": 0.17333333333333334, |
|
"eval_gpu_ram_allocated": 4.843059062957764, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 42, |
|
"eval_loss": 2.3475446701049805, |
|
"eval_precision": 0.14166666666666666, |
|
"eval_recall": 0.25, |
|
"eval_runtime": 0.2046, |
|
"eval_samples_per_second": 97.736, |
|
"eval_steps_per_second": 14.66, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.944629669189453, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"learning_rate": 1.4040000000000001e-05, |
|
"loss": 0.5591, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_accuracy": 0.25, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 36.27981948852539, |
|
"eval_f1": 0.17444444444444446, |
|
"eval_gpu_ram_allocated": 4.843059062957764, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 29, |
|
"eval_loss": 2.4503185749053955, |
|
"eval_precision": 0.14523809523809522, |
|
"eval_recall": 0.25, |
|
"eval_runtime": 0.2141, |
|
"eval_samples_per_second": 93.429, |
|
"eval_steps_per_second": 14.014, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.920143127441406, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"learning_rate": 1.3080000000000002e-05, |
|
"loss": 0.2893, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 17.5, |
|
"eval_accuracy": 0.25, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 36.27982711791992, |
|
"eval_f1": 0.17444444444444446, |
|
"eval_gpu_ram_allocated": 4.843067646026611, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 43, |
|
"eval_loss": 2.555687665939331, |
|
"eval_precision": 0.14523809523809522, |
|
"eval_recall": 0.25, |
|
"eval_runtime": 0.2265, |
|
"eval_samples_per_second": 88.288, |
|
"eval_steps_per_second": 13.243, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.961811065673828, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 1.2080000000000001e-05, |
|
"loss": 0.1623, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.3, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 36.279876708984375, |
|
"eval_f1": 0.2411111111111111, |
|
"eval_gpu_ram_allocated": 4.843057632446289, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 30, |
|
"eval_loss": 2.621793270111084, |
|
"eval_precision": 0.2452380952380952, |
|
"eval_recall": 0.3, |
|
"eval_runtime": 0.217, |
|
"eval_samples_per_second": 92.146, |
|
"eval_steps_per_second": 13.822, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.910972595214844, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"learning_rate": 1.1080000000000002e-05, |
|
"loss": 0.0817, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 22.5, |
|
"eval_accuracy": 0.3, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 36.279930114746094, |
|
"eval_f1": 0.24, |
|
"eval_gpu_ram_allocated": 4.843059062957764, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 38, |
|
"eval_loss": 2.734551191329956, |
|
"eval_precision": 0.24166666666666664, |
|
"eval_recall": 0.3, |
|
"eval_runtime": 0.2092, |
|
"eval_samples_per_second": 95.586, |
|
"eval_steps_per_second": 14.338, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.941272735595703, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"learning_rate": 1.008e-05, |
|
"loss": 0.0475, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_accuracy": 0.3, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 36.27995300292969, |
|
"eval_f1": 0.23444444444444446, |
|
"eval_gpu_ram_allocated": 4.843059062957764, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 40, |
|
"eval_loss": 2.9325406551361084, |
|
"eval_precision": 0.23690476190476187, |
|
"eval_recall": 0.3, |
|
"eval_runtime": 0.2145, |
|
"eval_samples_per_second": 93.259, |
|
"eval_steps_per_second": 13.989, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.931392669677734, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"learning_rate": 9.080000000000001e-06, |
|
"loss": 0.0322, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 27.5, |
|
"eval_accuracy": 0.3, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 36.28001403808594, |
|
"eval_f1": 0.2511111111111112, |
|
"eval_gpu_ram_allocated": 4.843059062957764, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 42, |
|
"eval_loss": 3.123502254486084, |
|
"eval_precision": 0.2869047619047619, |
|
"eval_recall": 0.3, |
|
"eval_runtime": 0.2104, |
|
"eval_samples_per_second": 95.043, |
|
"eval_steps_per_second": 14.256, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.933628082275391, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"learning_rate": 8.08e-06, |
|
"loss": 0.0254, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_accuracy": 0.3, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 36.280025482177734, |
|
"eval_f1": 0.23444444444444446, |
|
"eval_gpu_ram_allocated": 4.843059062957764, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 30, |
|
"eval_loss": 3.145473003387451, |
|
"eval_precision": 0.23690476190476187, |
|
"eval_recall": 0.3, |
|
"eval_runtime": 0.2137, |
|
"eval_samples_per_second": 93.578, |
|
"eval_steps_per_second": 14.037, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.938743591308594, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"learning_rate": 7.08e-06, |
|
"loss": 0.0195, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 32.5, |
|
"eval_accuracy": 0.3, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 36.28008270263672, |
|
"eval_f1": 0.23444444444444446, |
|
"eval_gpu_ram_allocated": 4.843059062957764, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 38, |
|
"eval_loss": 3.276718854904175, |
|
"eval_precision": 0.23690476190476187, |
|
"eval_recall": 0.3, |
|
"eval_runtime": 0.2064, |
|
"eval_samples_per_second": 96.908, |
|
"eval_steps_per_second": 14.536, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.919792175292969, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"learning_rate": 6.08e-06, |
|
"loss": 0.0163, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_accuracy": 0.3, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 40.403133392333984, |
|
"eval_f1": 0.23444444444444446, |
|
"eval_gpu_ram_allocated": 4.8430633544921875, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 42, |
|
"eval_loss": 3.3280787467956543, |
|
"eval_precision": 0.2369047619047619, |
|
"eval_recall": 0.3, |
|
"eval_runtime": 0.2124, |
|
"eval_samples_per_second": 94.164, |
|
"eval_steps_per_second": 14.125, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.970893859863281, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"learning_rate": 5.0800000000000005e-06, |
|
"loss": 0.015, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 37.5, |
|
"eval_accuracy": 0.3, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 40.40315246582031, |
|
"eval_f1": 0.23444444444444446, |
|
"eval_gpu_ram_allocated": 4.843059062957764, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 41, |
|
"eval_loss": 3.331848621368408, |
|
"eval_precision": 0.23690476190476187, |
|
"eval_recall": 0.3, |
|
"eval_runtime": 0.2059, |
|
"eval_samples_per_second": 97.115, |
|
"eval_steps_per_second": 14.567, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.9642181396484375, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 4.08e-06, |
|
"loss": 0.0133, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.3, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 40.40321350097656, |
|
"eval_f1": 0.2511111111111112, |
|
"eval_gpu_ram_allocated": 4.843057632446289, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 37, |
|
"eval_loss": 3.361743927001953, |
|
"eval_precision": 0.2869047619047619, |
|
"eval_recall": 0.3, |
|
"eval_runtime": 0.2122, |
|
"eval_samples_per_second": 94.248, |
|
"eval_steps_per_second": 14.137, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.96075439453125, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 42.5, |
|
"learning_rate": 3.08e-06, |
|
"loss": 0.0127, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 42.5, |
|
"eval_accuracy": 0.3, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 40.403221130371094, |
|
"eval_f1": 0.23444444444444446, |
|
"eval_gpu_ram_allocated": 4.843057632446289, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 39, |
|
"eval_loss": 3.3787875175476074, |
|
"eval_precision": 0.23690476190476187, |
|
"eval_recall": 0.3, |
|
"eval_runtime": 0.2221, |
|
"eval_samples_per_second": 90.039, |
|
"eval_steps_per_second": 13.506, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.961738586425781, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"learning_rate": 2.08e-06, |
|
"loss": 0.0129, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_accuracy": 0.3, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 40.40324783325195, |
|
"eval_f1": 0.2511111111111112, |
|
"eval_gpu_ram_allocated": 4.843059062957764, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 41, |
|
"eval_loss": 3.392756700515747, |
|
"eval_precision": 0.2869047619047619, |
|
"eval_recall": 0.3, |
|
"eval_runtime": 0.2132, |
|
"eval_samples_per_second": 93.819, |
|
"eval_steps_per_second": 14.073, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.957637786865234, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 47.5, |
|
"learning_rate": 1.08e-06, |
|
"loss": 0.0121, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 47.5, |
|
"eval_accuracy": 0.3, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 40.403297424316406, |
|
"eval_f1": 0.23444444444444446, |
|
"eval_gpu_ram_allocated": 4.8430633544921875, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 37, |
|
"eval_loss": 3.389730453491211, |
|
"eval_precision": 0.2369047619047619, |
|
"eval_recall": 0.3, |
|
"eval_runtime": 0.2122, |
|
"eval_samples_per_second": 94.239, |
|
"eval_steps_per_second": 14.136, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.942131042480469, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"learning_rate": 8e-08, |
|
"loss": 0.0124, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_accuracy": 0.3, |
|
"eval_disk_space_total": 78.1898422241211, |
|
"eval_disk_space_used": 40.4033203125, |
|
"eval_f1": 0.23444444444444446, |
|
"eval_gpu_ram_allocated": 4.843059062957764, |
|
"eval_gpu_ram_cached": 7.046875, |
|
"eval_gpu_ram_total": 39.56402587890625, |
|
"eval_gpu_utilization": 38, |
|
"eval_loss": 3.3983218669891357, |
|
"eval_precision": 0.2369047619047619, |
|
"eval_recall": 0.3, |
|
"eval_runtime": 0.2214, |
|
"eval_samples_per_second": 90.351, |
|
"eval_steps_per_second": 13.553, |
|
"eval_system_ram_total": 83.48074722290039, |
|
"eval_system_ram_used": 4.958106994628906, |
|
"step": 500 |
|
} |
|
], |
|
"max_steps": 500, |
|
"num_train_epochs": 50, |
|
"total_flos": 168180059293920.0, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|