``` { "cache_dir": "/leonardo_work/EUHPC_E03_068/.cache", "method": "orpo", "dataset": "autoredteam", "model": "bigcode/starcoder2-3b", "tokenizer": "TechxGenus/starcoder2-7b-instruct", "lr": 0.01, "train_batch_size": 5, "eval_batch_size": 5, "num_epochs": 3, "seed": 42, "eval_only": false, "evaluation_size": 2000, "checkpoint_path": null, "experiment_name": "starcoder2_3b-autoredteam_v2-train", "experiment_group": "results", "reference_model": null, "context_length": 1024, "train_summarization": "", "dpo_beta": 0.1, "orpo_beta": 0.1, "kl_coef": 0.0, "reward_model": "", "bestofn_size": 4, "train_reward_model": "" } ```