{ "best_metric": 0.11153655499219894, "best_model_checkpoint": "deepfake_detection/checkpoint-35710", "epoch": 10.0, "eval_steps": 500, "global_step": 35710, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.14001680201624195, "grad_norm": 1.0528628826141357, "learning_rate": 9.873808188446438e-07, "loss": 0.6555, "step": 500 }, { "epoch": 0.2800336040324839, "grad_norm": 1.7406058311462402, "learning_rate": 9.733595064498036e-07, "loss": 0.4817, "step": 1000 }, { "epoch": 0.42005040604872584, "grad_norm": 2.337244987487793, "learning_rate": 9.593381940549635e-07, "loss": 0.3393, "step": 1500 }, { "epoch": 0.5600672080649678, "grad_norm": 2.840176582336426, "learning_rate": 9.453168816601234e-07, "loss": 0.2739, "step": 2000 }, { "epoch": 0.7000840100812098, "grad_norm": 2.7442541122436523, "learning_rate": 9.312955692652832e-07, "loss": 0.2335, "step": 2500 }, { "epoch": 0.8401008120974517, "grad_norm": 4.048594951629639, "learning_rate": 9.17274256870443e-07, "loss": 0.2087, "step": 3000 }, { "epoch": 0.9801176141136937, "grad_norm": 2.741589069366455, "learning_rate": 9.032529444756028e-07, "loss": 0.1953, "step": 3500 }, { "epoch": 1.0, "eval_accuracy": 0.9271411877470096, "eval_loss": 0.277784526348114, "eval_model_preparation_time": 0.003, "eval_runtime": 461.9798, "eval_samples_per_second": 164.858, "eval_steps_per_second": 20.609, "step": 3571 }, { "epoch": 1.1201344161299356, "grad_norm": 2.69423246383667, "learning_rate": 8.892316320807627e-07, "loss": 0.1865, "step": 4000 }, { "epoch": 1.2601512181461776, "grad_norm": 3.063028335571289, "learning_rate": 8.752103196859225e-07, "loss": 0.1789, "step": 4500 }, { "epoch": 1.4001680201624196, "grad_norm": 2.7112696170806885, "learning_rate": 8.611890072910825e-07, "loss": 0.1587, "step": 5000 }, { "epoch": 1.5401848221786616, "grad_norm": 1.334972858428955, "learning_rate": 8.471676948962423e-07, "loss": 0.1654, "step": 5500 }, { "epoch": 1.6802016241949034, "grad_norm": 4.083142280578613, "learning_rate": 8.331463825014021e-07, "loss": 0.1472, "step": 6000 }, { "epoch": 1.8202184262111454, "grad_norm": 3.0988452434539795, "learning_rate": 8.191250701065619e-07, "loss": 0.1454, "step": 6500 }, { "epoch": 1.9602352282273872, "grad_norm": 4.7256011962890625, "learning_rate": 8.051037577117218e-07, "loss": 0.137, "step": 7000 }, { "epoch": 2.0, "eval_accuracy": 0.9447748847835507, "eval_loss": 0.19382010400295258, "eval_model_preparation_time": 0.003, "eval_runtime": 446.387, "eval_samples_per_second": 170.617, "eval_steps_per_second": 21.329, "step": 7142 }, { "epoch": 2.100252030243629, "grad_norm": 2.3897340297698975, "learning_rate": 7.910824453168816e-07, "loss": 0.1272, "step": 7500 }, { "epoch": 2.240268832259871, "grad_norm": 7.572381019592285, "learning_rate": 7.770611329220415e-07, "loss": 0.1339, "step": 8000 }, { "epoch": 2.380285634276113, "grad_norm": 2.5557875633239746, "learning_rate": 7.630398205272013e-07, "loss": 0.1352, "step": 8500 }, { "epoch": 2.520302436292355, "grad_norm": 2.867504119873047, "learning_rate": 7.490185081323611e-07, "loss": 0.1312, "step": 9000 }, { "epoch": 2.660319238308597, "grad_norm": 5.774721622467041, "learning_rate": 7.34997195737521e-07, "loss": 0.1263, "step": 9500 }, { "epoch": 2.800336040324839, "grad_norm": 1.3829355239868164, "learning_rate": 7.209758833426809e-07, "loss": 0.1222, "step": 10000 }, { "epoch": 2.940352842341081, "grad_norm": 3.2408864498138428, "learning_rate": 7.069545709478407e-07, "loss": 0.1238, "step": 10500 }, { "epoch": 3.0, "eval_accuracy": 0.9503420385761742, "eval_loss": 0.16304399073123932, "eval_model_preparation_time": 0.003, "eval_runtime": 445.7116, "eval_samples_per_second": 170.875, "eval_steps_per_second": 21.361, "step": 10713 }, { "epoch": 3.0803696443573227, "grad_norm": 5.649173259735107, "learning_rate": 6.929332585530005e-07, "loss": 0.1163, "step": 11000 }, { "epoch": 3.2203864463735647, "grad_norm": 6.677682876586914, "learning_rate": 6.789119461581604e-07, "loss": 0.1151, "step": 11500 }, { "epoch": 3.3604032483898068, "grad_norm": 6.577985763549805, "learning_rate": 6.648906337633202e-07, "loss": 0.116, "step": 12000 }, { "epoch": 3.5004200504060488, "grad_norm": 4.521468162536621, "learning_rate": 6.508693213684801e-07, "loss": 0.1166, "step": 12500 }, { "epoch": 3.6404368524222908, "grad_norm": 0.37339428067207336, "learning_rate": 6.368480089736399e-07, "loss": 0.1063, "step": 13000 }, { "epoch": 3.7804536544385328, "grad_norm": 2.2542712688446045, "learning_rate": 6.228266965787997e-07, "loss": 0.108, "step": 13500 }, { "epoch": 3.9204704564547743, "grad_norm": 11.904646873474121, "learning_rate": 6.088053841839595e-07, "loss": 0.1094, "step": 14000 }, { "epoch": 4.0, "eval_accuracy": 0.9547406152755348, "eval_loss": 0.14307229220867157, "eval_model_preparation_time": 0.003, "eval_runtime": 473.4476, "eval_samples_per_second": 160.865, "eval_steps_per_second": 20.11, "step": 14284 }, { "epoch": 4.060487258471016, "grad_norm": 1.572487711906433, "learning_rate": 5.947840717891194e-07, "loss": 0.1105, "step": 14500 }, { "epoch": 4.200504060487258, "grad_norm": 2.691279888153076, "learning_rate": 5.807627593942794e-07, "loss": 0.102, "step": 15000 }, { "epoch": 4.3405208625035, "grad_norm": 3.9856207370758057, "learning_rate": 5.667414469994392e-07, "loss": 0.1032, "step": 15500 }, { "epoch": 4.480537664519742, "grad_norm": 1.828147292137146, "learning_rate": 5.52720134604599e-07, "loss": 0.1029, "step": 16000 }, { "epoch": 4.620554466535984, "grad_norm": 0.4480103850364685, "learning_rate": 5.386988222097588e-07, "loss": 0.1024, "step": 16500 }, { "epoch": 4.760571268552226, "grad_norm": 9.204968452453613, "learning_rate": 5.246775098149186e-07, "loss": 0.0991, "step": 17000 }, { "epoch": 4.900588070568468, "grad_norm": 7.890961170196533, "learning_rate": 5.106561974200784e-07, "loss": 0.1056, "step": 17500 }, { "epoch": 5.0, "eval_accuracy": 0.9586927692651095, "eval_loss": 0.12978993356227875, "eval_model_preparation_time": 0.003, "eval_runtime": 437.804, "eval_samples_per_second": 173.961, "eval_steps_per_second": 21.747, "step": 17855 }, { "epoch": 5.04060487258471, "grad_norm": 2.9656715393066406, "learning_rate": 4.966348850252384e-07, "loss": 0.0981, "step": 18000 }, { "epoch": 5.180621674600952, "grad_norm": 1.717795968055725, "learning_rate": 4.826135726303982e-07, "loss": 0.1049, "step": 18500 }, { "epoch": 5.320638476617194, "grad_norm": 4.466497421264648, "learning_rate": 4.6859226023555804e-07, "loss": 0.1009, "step": 19000 }, { "epoch": 5.460655278633436, "grad_norm": 2.382636547088623, "learning_rate": 4.5457094784071786e-07, "loss": 0.097, "step": 19500 }, { "epoch": 5.600672080649678, "grad_norm": 3.537141799926758, "learning_rate": 4.405496354458777e-07, "loss": 0.0947, "step": 20000 }, { "epoch": 5.7406888826659195, "grad_norm": 3.2444217205047607, "learning_rate": 4.2652832305103755e-07, "loss": 0.096, "step": 20500 }, { "epoch": 5.8807056846821615, "grad_norm": 6.093824863433838, "learning_rate": 4.125070106561974e-07, "loss": 0.0962, "step": 21000 }, { "epoch": 6.0, "eval_accuracy": 0.9606754112997465, "eval_loss": 0.1219368726015091, "eval_model_preparation_time": 0.003, "eval_runtime": 461.6477, "eval_samples_per_second": 164.976, "eval_steps_per_second": 20.624, "step": 21426 }, { "epoch": 6.0207224866984035, "grad_norm": 6.273184299468994, "learning_rate": 3.9848569826135723e-07, "loss": 0.1001, "step": 21500 }, { "epoch": 6.1607392887146455, "grad_norm": 1.6552726030349731, "learning_rate": 3.844643858665171e-07, "loss": 0.0935, "step": 22000 }, { "epoch": 6.3007560907308875, "grad_norm": 1.253029465675354, "learning_rate": 3.704430734716769e-07, "loss": 0.0944, "step": 22500 }, { "epoch": 6.4407728927471295, "grad_norm": 6.506760120391846, "learning_rate": 3.564217610768368e-07, "loss": 0.092, "step": 23000 }, { "epoch": 6.5807896947633715, "grad_norm": 6.743386268615723, "learning_rate": 3.4240044868199666e-07, "loss": 0.0955, "step": 23500 }, { "epoch": 6.7208064967796135, "grad_norm": 7.667580604553223, "learning_rate": 3.2837913628715647e-07, "loss": 0.0923, "step": 24000 }, { "epoch": 6.8608232987958555, "grad_norm": 3.372116804122925, "learning_rate": 3.143578238923163e-07, "loss": 0.0992, "step": 24500 }, { "epoch": 7.0, "eval_accuracy": 0.9620934599073017, "eval_loss": 0.11666399985551834, "eval_model_preparation_time": 0.003, "eval_runtime": 447.5043, "eval_samples_per_second": 170.191, "eval_steps_per_second": 21.276, "step": 24997 }, { "epoch": 7.0008401008120975, "grad_norm": 0.35922595858573914, "learning_rate": 3.003365114974761e-07, "loss": 0.0889, "step": 25000 }, { "epoch": 7.1408569028283395, "grad_norm": 0.6694265007972717, "learning_rate": 2.8631519910263603e-07, "loss": 0.0904, "step": 25500 }, { "epoch": 7.2808737048445815, "grad_norm": 0.41824430227279663, "learning_rate": 2.7229388670779584e-07, "loss": 0.0934, "step": 26000 }, { "epoch": 7.4208905068608235, "grad_norm": 6.300509929656982, "learning_rate": 2.5827257431295566e-07, "loss": 0.0957, "step": 26500 }, { "epoch": 7.5609073088770655, "grad_norm": 3.323270559310913, "learning_rate": 2.4425126191811553e-07, "loss": 0.0879, "step": 27000 }, { "epoch": 7.7009241108933075, "grad_norm": 3.265133857727051, "learning_rate": 2.3022994952327537e-07, "loss": 0.091, "step": 27500 }, { "epoch": 7.8409409129095495, "grad_norm": 9.813462257385254, "learning_rate": 2.1620863712843522e-07, "loss": 0.0895, "step": 28000 }, { "epoch": 7.9809577149257915, "grad_norm": 0.7600739002227783, "learning_rate": 2.0218732473359506e-07, "loss": 0.0911, "step": 28500 }, { "epoch": 8.0, "eval_accuracy": 0.9630519557253713, "eval_loss": 0.1135854721069336, "eval_model_preparation_time": 0.003, "eval_runtime": 439.5121, "eval_samples_per_second": 173.285, "eval_steps_per_second": 21.663, "step": 28568 }, { "epoch": 8.120974516942033, "grad_norm": 6.328824043273926, "learning_rate": 1.881660123387549e-07, "loss": 0.0875, "step": 29000 }, { "epoch": 8.260991318958276, "grad_norm": 16.23442840576172, "learning_rate": 1.7414469994391472e-07, "loss": 0.089, "step": 29500 }, { "epoch": 8.401008120974517, "grad_norm": 7.651858329772949, "learning_rate": 1.601233875490746e-07, "loss": 0.0917, "step": 30000 }, { "epoch": 8.54102492299076, "grad_norm": 2.9040281772613525, "learning_rate": 1.4610207515423443e-07, "loss": 0.0857, "step": 30500 }, { "epoch": 8.681041725007, "grad_norm": 6.958981990814209, "learning_rate": 1.3208076275939427e-07, "loss": 0.09, "step": 31000 }, { "epoch": 8.821058527023244, "grad_norm": 4.84717321395874, "learning_rate": 1.1805945036455412e-07, "loss": 0.0917, "step": 31500 }, { "epoch": 8.961075329039485, "grad_norm": 6.962361812591553, "learning_rate": 1.0403813796971396e-07, "loss": 0.0889, "step": 32000 }, { "epoch": 9.0, "eval_accuracy": 0.9630256955659721, "eval_loss": 0.11266375333070755, "eval_model_preparation_time": 0.003, "eval_runtime": 440.431, "eval_samples_per_second": 172.924, "eval_steps_per_second": 21.617, "step": 32139 }, { "epoch": 9.101092131055728, "grad_norm": 13.205183982849121, "learning_rate": 9.00168255748738e-08, "loss": 0.09, "step": 32500 }, { "epoch": 9.241108933071969, "grad_norm": 0.3649824261665344, "learning_rate": 7.599551318003366e-08, "loss": 0.0818, "step": 33000 }, { "epoch": 9.38112573508821, "grad_norm": 1.0988820791244507, "learning_rate": 6.197420078519349e-08, "loss": 0.0847, "step": 33500 }, { "epoch": 9.521142537104453, "grad_norm": 10.086563110351562, "learning_rate": 4.795288839035334e-08, "loss": 0.0909, "step": 34000 }, { "epoch": 9.661159339120694, "grad_norm": 1.5432101488113403, "learning_rate": 3.3931575995513173e-08, "loss": 0.0873, "step": 34500 }, { "epoch": 9.801176141136937, "grad_norm": 5.793679714202881, "learning_rate": 1.9910263600673023e-08, "loss": 0.0847, "step": 35000 }, { "epoch": 9.941192943153178, "grad_norm": 1.821306824684143, "learning_rate": 5.888951205832866e-09, "loss": 0.0862, "step": 35500 }, { "epoch": 10.0, "eval_accuracy": 0.9634327280366591, "eval_loss": 0.11153655499219894, "eval_model_preparation_time": 0.003, "eval_runtime": 439.1936, "eval_samples_per_second": 173.411, "eval_steps_per_second": 21.678, "step": 35710 } ], "logging_steps": 500, "max_steps": 35710, "num_input_tokens_seen": 0, "num_train_epochs": 10, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 8.852762385560602e+19, "train_batch_size": 32, "trial_name": null, "trial_params": null }