|
``` |
|
training_arguments = transformers.TrainingArguments( |
|
per_device_train_batch_size=4, |
|
gradient_accumulation_steps=BATCH_SIZE // MICRO_BATCH_SIZE, |
|
warmup_steps=100, |
|
max_steps=300, |
|
learning_rate=3e-4, |
|
fp16=True, |
|
logging_steps=10, |
|
optim="adamw_torch", |
|
evaluation_strategy="steps", |
|
save_strategy="steps", |
|
eval_steps=50, |
|
save_steps=50, |
|
save_total_limit=3, |
|
load_best_model_at_end=True, |
|
) |
|
|
|
LORA_R = 8 |
|
LORA_ALPHA = 16 |
|
LORA_DROPOUT= 0 |
|
LORA_TARGET_MODULES = [ |
|
"q_proj", |
|
"v_proj", |
|
] |
|
|
|
BATCH_SIZE = 128 |
|
MICRO_BATCH_SIZE = 4 |
|
GRADIENT_ACCUMULATION_STEPS = BATCH_SIZE // MICRO_BATCH_SIZE |
|
LEARNING_RATE = 3e-4 |
|
TRAIN_STEPS = 300 |
|
``` |