{ "best_metric": null, "best_model_checkpoint": null, "epoch": 50.0, "global_step": 500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1, "learning_rate": 1.9960000000000002e-05, "loss": 2.314, "step": 1 }, { "epoch": 2.5, "learning_rate": 1.9e-05, "loss": 2.311, "step": 25 }, { "epoch": 2.5, "eval_accuracy": 0.15, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 36.27968215942383, "eval_f1": 0.08571428571428572, "eval_gpu_ram_allocated": 4.843059062957764, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 40, "eval_loss": 2.3033015727996826, "eval_precision": 0.11052631578947367, "eval_recall": 0.15, "eval_runtime": 0.2186, "eval_samples_per_second": 91.494, "eval_steps_per_second": 13.724, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 5.000797271728516, "step": 25 }, { "epoch": 5.0, "learning_rate": 1.8e-05, "loss": 2.2703, "step": 50 }, { "epoch": 5.0, "eval_accuracy": 0.2, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 36.279693603515625, "eval_f1": 0.06857142857142857, "eval_gpu_ram_allocated": 4.843057632446289, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 38, "eval_loss": 2.3011314868927, "eval_precision": 0.041666666666666664, "eval_recall": 0.2, "eval_runtime": 0.2293, "eval_samples_per_second": 87.241, "eval_steps_per_second": 13.086, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 5.012126922607422, "step": 50 }, { "epoch": 7.5, "learning_rate": 1.704e-05, "loss": 2.0062, "step": 75 }, { "epoch": 7.5, "eval_accuracy": 0.15, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 36.27973175048828, "eval_f1": 0.07936507936507937, "eval_gpu_ram_allocated": 4.843067646026611, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 42, "eval_loss": 2.2817370891571045, "eval_precision": 0.05428571428571429, "eval_recall": 0.15, "eval_runtime": 0.208, "eval_samples_per_second": 96.173, "eval_steps_per_second": 14.426, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.985599517822266, "step": 75 }, { "epoch": 10.0, "learning_rate": 1.6040000000000002e-05, "loss": 1.49, "step": 100 }, { "epoch": 10.0, "eval_accuracy": 0.2, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 36.27973556518555, "eval_f1": 0.11777777777777779, "eval_gpu_ram_allocated": 4.843059062957764, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 37, "eval_loss": 2.3280632495880127, "eval_precision": 0.0869047619047619, "eval_recall": 0.2, "eval_runtime": 0.208, "eval_samples_per_second": 96.137, "eval_steps_per_second": 14.421, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.982398986816406, "step": 100 }, { "epoch": 12.5, "learning_rate": 1.5040000000000002e-05, "loss": 0.9424, "step": 125 }, { "epoch": 12.5, "eval_accuracy": 0.25, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 36.27980041503906, "eval_f1": 0.17333333333333334, "eval_gpu_ram_allocated": 4.843059062957764, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 42, "eval_loss": 2.3475446701049805, "eval_precision": 0.14166666666666666, "eval_recall": 0.25, "eval_runtime": 0.2046, "eval_samples_per_second": 97.736, "eval_steps_per_second": 14.66, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.944629669189453, "step": 125 }, { "epoch": 15.0, "learning_rate": 1.4040000000000001e-05, "loss": 0.5591, "step": 150 }, { "epoch": 15.0, "eval_accuracy": 0.25, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 36.27981948852539, "eval_f1": 0.17444444444444446, "eval_gpu_ram_allocated": 4.843059062957764, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 29, "eval_loss": 2.4503185749053955, "eval_precision": 0.14523809523809522, "eval_recall": 0.25, "eval_runtime": 0.2141, "eval_samples_per_second": 93.429, "eval_steps_per_second": 14.014, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.920143127441406, "step": 150 }, { "epoch": 17.5, "learning_rate": 1.3080000000000002e-05, "loss": 0.2893, "step": 175 }, { "epoch": 17.5, "eval_accuracy": 0.25, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 36.27982711791992, "eval_f1": 0.17444444444444446, "eval_gpu_ram_allocated": 4.843067646026611, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 43, "eval_loss": 2.555687665939331, "eval_precision": 0.14523809523809522, "eval_recall": 0.25, "eval_runtime": 0.2265, "eval_samples_per_second": 88.288, "eval_steps_per_second": 13.243, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.961811065673828, "step": 175 }, { "epoch": 20.0, "learning_rate": 1.2080000000000001e-05, "loss": 0.1623, "step": 200 }, { "epoch": 20.0, "eval_accuracy": 0.3, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 36.279876708984375, "eval_f1": 0.2411111111111111, "eval_gpu_ram_allocated": 4.843057632446289, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 30, "eval_loss": 2.621793270111084, "eval_precision": 0.2452380952380952, "eval_recall": 0.3, "eval_runtime": 0.217, "eval_samples_per_second": 92.146, "eval_steps_per_second": 13.822, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.910972595214844, "step": 200 }, { "epoch": 22.5, "learning_rate": 1.1080000000000002e-05, "loss": 0.0817, "step": 225 }, { "epoch": 22.5, "eval_accuracy": 0.3, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 36.279930114746094, "eval_f1": 0.24, "eval_gpu_ram_allocated": 4.843059062957764, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 38, "eval_loss": 2.734551191329956, "eval_precision": 0.24166666666666664, "eval_recall": 0.3, "eval_runtime": 0.2092, "eval_samples_per_second": 95.586, "eval_steps_per_second": 14.338, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.941272735595703, "step": 225 }, { "epoch": 25.0, "learning_rate": 1.008e-05, "loss": 0.0475, "step": 250 }, { "epoch": 25.0, "eval_accuracy": 0.3, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 36.27995300292969, "eval_f1": 0.23444444444444446, "eval_gpu_ram_allocated": 4.843059062957764, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 40, "eval_loss": 2.9325406551361084, "eval_precision": 0.23690476190476187, "eval_recall": 0.3, "eval_runtime": 0.2145, "eval_samples_per_second": 93.259, "eval_steps_per_second": 13.989, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.931392669677734, "step": 250 }, { "epoch": 27.5, "learning_rate": 9.080000000000001e-06, "loss": 0.0322, "step": 275 }, { "epoch": 27.5, "eval_accuracy": 0.3, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 36.28001403808594, "eval_f1": 0.2511111111111112, "eval_gpu_ram_allocated": 4.843059062957764, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 42, "eval_loss": 3.123502254486084, "eval_precision": 0.2869047619047619, "eval_recall": 0.3, "eval_runtime": 0.2104, "eval_samples_per_second": 95.043, "eval_steps_per_second": 14.256, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.933628082275391, "step": 275 }, { "epoch": 30.0, "learning_rate": 8.08e-06, "loss": 0.0254, "step": 300 }, { "epoch": 30.0, "eval_accuracy": 0.3, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 36.280025482177734, "eval_f1": 0.23444444444444446, "eval_gpu_ram_allocated": 4.843059062957764, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 30, "eval_loss": 3.145473003387451, "eval_precision": 0.23690476190476187, "eval_recall": 0.3, "eval_runtime": 0.2137, "eval_samples_per_second": 93.578, "eval_steps_per_second": 14.037, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.938743591308594, "step": 300 }, { "epoch": 32.5, "learning_rate": 7.08e-06, "loss": 0.0195, "step": 325 }, { "epoch": 32.5, "eval_accuracy": 0.3, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 36.28008270263672, "eval_f1": 0.23444444444444446, "eval_gpu_ram_allocated": 4.843059062957764, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 38, "eval_loss": 3.276718854904175, "eval_precision": 0.23690476190476187, "eval_recall": 0.3, "eval_runtime": 0.2064, "eval_samples_per_second": 96.908, "eval_steps_per_second": 14.536, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.919792175292969, "step": 325 }, { "epoch": 35.0, "learning_rate": 6.08e-06, "loss": 0.0163, "step": 350 }, { "epoch": 35.0, "eval_accuracy": 0.3, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 40.403133392333984, "eval_f1": 0.23444444444444446, "eval_gpu_ram_allocated": 4.8430633544921875, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 42, "eval_loss": 3.3280787467956543, "eval_precision": 0.2369047619047619, "eval_recall": 0.3, "eval_runtime": 0.2124, "eval_samples_per_second": 94.164, "eval_steps_per_second": 14.125, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.970893859863281, "step": 350 }, { "epoch": 37.5, "learning_rate": 5.0800000000000005e-06, "loss": 0.015, "step": 375 }, { "epoch": 37.5, "eval_accuracy": 0.3, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 40.40315246582031, "eval_f1": 0.23444444444444446, "eval_gpu_ram_allocated": 4.843059062957764, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 41, "eval_loss": 3.331848621368408, "eval_precision": 0.23690476190476187, "eval_recall": 0.3, "eval_runtime": 0.2059, "eval_samples_per_second": 97.115, "eval_steps_per_second": 14.567, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.9642181396484375, "step": 375 }, { "epoch": 40.0, "learning_rate": 4.08e-06, "loss": 0.0133, "step": 400 }, { "epoch": 40.0, "eval_accuracy": 0.3, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 40.40321350097656, "eval_f1": 0.2511111111111112, "eval_gpu_ram_allocated": 4.843057632446289, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 37, "eval_loss": 3.361743927001953, "eval_precision": 0.2869047619047619, "eval_recall": 0.3, "eval_runtime": 0.2122, "eval_samples_per_second": 94.248, "eval_steps_per_second": 14.137, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.96075439453125, "step": 400 }, { "epoch": 42.5, "learning_rate": 3.08e-06, "loss": 0.0127, "step": 425 }, { "epoch": 42.5, "eval_accuracy": 0.3, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 40.403221130371094, "eval_f1": 0.23444444444444446, "eval_gpu_ram_allocated": 4.843057632446289, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 39, "eval_loss": 3.3787875175476074, "eval_precision": 0.23690476190476187, "eval_recall": 0.3, "eval_runtime": 0.2221, "eval_samples_per_second": 90.039, "eval_steps_per_second": 13.506, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.961738586425781, "step": 425 }, { "epoch": 45.0, "learning_rate": 2.08e-06, "loss": 0.0129, "step": 450 }, { "epoch": 45.0, "eval_accuracy": 0.3, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 40.40324783325195, "eval_f1": 0.2511111111111112, "eval_gpu_ram_allocated": 4.843059062957764, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 41, "eval_loss": 3.392756700515747, "eval_precision": 0.2869047619047619, "eval_recall": 0.3, "eval_runtime": 0.2132, "eval_samples_per_second": 93.819, "eval_steps_per_second": 14.073, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.957637786865234, "step": 450 }, { "epoch": 47.5, "learning_rate": 1.08e-06, "loss": 0.0121, "step": 475 }, { "epoch": 47.5, "eval_accuracy": 0.3, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 40.403297424316406, "eval_f1": 0.23444444444444446, "eval_gpu_ram_allocated": 4.8430633544921875, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 37, "eval_loss": 3.389730453491211, "eval_precision": 0.2369047619047619, "eval_recall": 0.3, "eval_runtime": 0.2122, "eval_samples_per_second": 94.239, "eval_steps_per_second": 14.136, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.942131042480469, "step": 475 }, { "epoch": 50.0, "learning_rate": 8e-08, "loss": 0.0124, "step": 500 }, { "epoch": 50.0, "eval_accuracy": 0.3, "eval_disk_space_total": 78.1898422241211, "eval_disk_space_used": 40.4033203125, "eval_f1": 0.23444444444444446, "eval_gpu_ram_allocated": 4.843059062957764, "eval_gpu_ram_cached": 7.046875, "eval_gpu_ram_total": 39.56402587890625, "eval_gpu_utilization": 38, "eval_loss": 3.3983218669891357, "eval_precision": 0.2369047619047619, "eval_recall": 0.3, "eval_runtime": 0.2214, "eval_samples_per_second": 90.351, "eval_steps_per_second": 13.553, "eval_system_ram_total": 83.48074722290039, "eval_system_ram_used": 4.958106994628906, "step": 500 } ], "max_steps": 500, "num_train_epochs": 50, "total_flos": 168180059293920.0, "trial_name": null, "trial_params": null }