{ "best_metric": 0.0011106810998171568, "best_model_checkpoint": "Result/OffsetTwo_batch16_step2000\\checkpoint-600", "epoch": 28.571428571428573, "global_step": 600, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.48, "learning_rate": 0.000199, "loss": 0.6712, "step": 10 }, { "epoch": 0.95, "learning_rate": 0.00019800000000000002, "loss": 0.7073, "step": 20 }, { "epoch": 1.43, "learning_rate": 0.00019700000000000002, "loss": 0.3994, "step": 30 }, { "epoch": 1.9, "learning_rate": 0.000196, "loss": 0.5316, "step": 40 }, { "epoch": 2.38, "learning_rate": 0.000195, "loss": 0.6121, "step": 50 }, { "epoch": 2.86, "learning_rate": 0.000194, "loss": 0.4975, "step": 60 }, { "epoch": 3.33, "learning_rate": 0.000193, "loss": 0.5137, "step": 70 }, { "epoch": 3.81, "learning_rate": 0.000192, "loss": 0.2739, "step": 80 }, { "epoch": 4.29, "learning_rate": 0.000191, "loss": 0.6669, "step": 90 }, { "epoch": 4.76, "learning_rate": 0.00019, "loss": 0.3004, "step": 100 }, { "epoch": 5.24, "learning_rate": 0.00018899999999999999, "loss": 0.1061, "step": 110 }, { "epoch": 5.71, "learning_rate": 0.000188, "loss": 0.2727, "step": 120 }, { "epoch": 6.19, "learning_rate": 0.00018700000000000002, "loss": 0.0707, "step": 130 }, { "epoch": 6.67, "learning_rate": 0.00018600000000000002, "loss": 0.2655, "step": 140 }, { "epoch": 7.14, "learning_rate": 0.00018500000000000002, "loss": 0.1479, "step": 150 }, { "epoch": 7.62, "learning_rate": 0.00018400000000000003, "loss": 0.1231, "step": 160 }, { "epoch": 8.1, "learning_rate": 0.000183, "loss": 0.0183, "step": 170 }, { "epoch": 8.57, "learning_rate": 0.000182, "loss": 0.0423, "step": 180 }, { "epoch": 9.05, "learning_rate": 0.000181, "loss": 0.057, "step": 190 }, { "epoch": 9.52, "learning_rate": 0.00018, "loss": 0.0082, "step": 200 }, { "epoch": 9.52, "eval_accuracy": 1.0, "eval_f1": 1.0, "eval_loss": 0.006428680382668972, "eval_precision": 1.0, "eval_recall": 1.0, "eval_runtime": 0.738, "eval_samples_per_second": 75.879, "eval_steps_per_second": 9.485, "step": 200 }, { "epoch": 10.0, "learning_rate": 0.00017900000000000001, "loss": 0.0118, "step": 210 }, { "epoch": 10.48, "learning_rate": 0.00017800000000000002, "loss": 0.007, "step": 220 }, { "epoch": 10.95, "learning_rate": 0.00017700000000000002, "loss": 0.0056, "step": 230 }, { "epoch": 11.43, "learning_rate": 0.00017600000000000002, "loss": 0.0048, "step": 240 }, { "epoch": 11.9, "learning_rate": 0.000175, "loss": 0.0045, "step": 250 }, { "epoch": 12.38, "learning_rate": 0.000174, "loss": 0.0042, "step": 260 }, { "epoch": 12.86, "learning_rate": 0.000173, "loss": 0.004, "step": 270 }, { "epoch": 13.33, "learning_rate": 0.000172, "loss": 0.0036, "step": 280 }, { "epoch": 13.81, "learning_rate": 0.000171, "loss": 0.0034, "step": 290 }, { "epoch": 14.29, "learning_rate": 0.00017, "loss": 0.0032, "step": 300 }, { "epoch": 14.76, "learning_rate": 0.00016900000000000002, "loss": 0.0031, "step": 310 }, { "epoch": 15.24, "learning_rate": 0.000168, "loss": 0.0029, "step": 320 }, { "epoch": 15.71, "learning_rate": 0.000167, "loss": 0.0028, "step": 330 }, { "epoch": 16.19, "learning_rate": 0.000166, "loss": 0.0027, "step": 340 }, { "epoch": 16.67, "learning_rate": 0.000165, "loss": 0.0025, "step": 350 }, { "epoch": 17.14, "learning_rate": 0.000164, "loss": 0.0025, "step": 360 }, { "epoch": 17.62, "learning_rate": 0.000163, "loss": 0.0024, "step": 370 }, { "epoch": 18.1, "learning_rate": 0.000162, "loss": 0.0023, "step": 380 }, { "epoch": 18.57, "learning_rate": 0.000161, "loss": 0.0021, "step": 390 }, { "epoch": 19.05, "learning_rate": 0.00016, "loss": 0.0021, "step": 400 }, { "epoch": 19.05, "eval_accuracy": 1.0, "eval_f1": 1.0, "eval_loss": 0.0019897979218512774, "eval_precision": 1.0, "eval_recall": 1.0, "eval_runtime": 0.713, "eval_samples_per_second": 78.541, "eval_steps_per_second": 9.818, "step": 400 }, { "epoch": 19.52, "learning_rate": 0.00015900000000000002, "loss": 0.002, "step": 410 }, { "epoch": 20.0, "learning_rate": 0.00015800000000000002, "loss": 0.0019, "step": 420 }, { "epoch": 20.48, "learning_rate": 0.00015700000000000002, "loss": 0.0019, "step": 430 }, { "epoch": 20.95, "learning_rate": 0.00015600000000000002, "loss": 0.0018, "step": 440 }, { "epoch": 21.43, "learning_rate": 0.000155, "loss": 0.0017, "step": 450 }, { "epoch": 21.9, "learning_rate": 0.000154, "loss": 0.0017, "step": 460 }, { "epoch": 22.38, "learning_rate": 0.000153, "loss": 0.0017, "step": 470 }, { "epoch": 22.86, "learning_rate": 0.000152, "loss": 0.0016, "step": 480 }, { "epoch": 23.33, "learning_rate": 0.000151, "loss": 0.0015, "step": 490 }, { "epoch": 23.81, "learning_rate": 0.00015000000000000001, "loss": 0.0015, "step": 500 }, { "epoch": 24.29, "learning_rate": 0.00014900000000000002, "loss": 0.0014, "step": 510 }, { "epoch": 24.76, "learning_rate": 0.000148, "loss": 0.0014, "step": 520 }, { "epoch": 25.24, "learning_rate": 0.000147, "loss": 0.0014, "step": 530 }, { "epoch": 25.71, "learning_rate": 0.000146, "loss": 0.0013, "step": 540 }, { "epoch": 26.19, "learning_rate": 0.000145, "loss": 0.0013, "step": 550 }, { "epoch": 26.67, "learning_rate": 0.000144, "loss": 0.0013, "step": 560 }, { "epoch": 27.14, "learning_rate": 0.000143, "loss": 0.0012, "step": 570 }, { "epoch": 27.62, "learning_rate": 0.000142, "loss": 0.0012, "step": 580 }, { "epoch": 28.1, "learning_rate": 0.000141, "loss": 0.0012, "step": 590 }, { "epoch": 28.57, "learning_rate": 0.00014, "loss": 0.0012, "step": 600 }, { "epoch": 28.57, "eval_accuracy": 1.0, "eval_f1": 1.0, "eval_loss": 0.0011106810998171568, "eval_precision": 1.0, "eval_recall": 1.0, "eval_runtime": 0.738, "eval_samples_per_second": 75.881, "eval_steps_per_second": 9.485, "step": 600 } ], "max_steps": 2000, "num_train_epochs": 96, "total_flos": 7.287346703356232e+17, "trial_name": null, "trial_params": null }