|
--- |
|
datasets: |
|
- pszemraj/scientific_lay_summarisation-plos-norm |
|
language: |
|
- en |
|
metrics: |
|
- bleu |
|
- rouge |
|
pipeline_tag: summarization |
|
--- |
|
|
|
# Hyperparameters |
|
learning_rate=2e-5 |
|
per_device_train_batch_size=14 |
|
per_device_eval_batch_size=14 |
|
weight_decay=0.01 |
|
save_total_limit=3 |
|
num_train_epochs=3 |
|
predict_with_generate=True |
|
fp16=True |
|
|
|
# Training Output |
|
global_step=4248, |
|
training_loss=2.4160910424988598, |
|
metrics={'train_runtime': 14565.4519, |
|
'train_samples_per_second': 4.082, |
|
'train_steps_per_second': 0.292, |
|
'total_flos': 1.7179021728232243e+17, |
|
'train_loss': 2.4160910424988598, |
|
'epoch': 3.0} |
|
|
|
# Training Results |
|
|
|
| Epoch | Training Loss | Validation Loss | Rouge1 | Rouge2 | Rougel | Rougelsum | Bleu | Gen Len | |
|
|:----- |:------------ |:--------------- |:-------- | :------- |:-------- |:--------- |:-------- |:--------- | |
|
|1| 2.467100| 2.303269| 0.410900| 0.136200| 0.235900| 0.235900| 0.465700| 182.332800 |
|
|2| 2.386700| 2.281062| 0.426300| 0.142300| 0.246800| 0.246700| 0.525200| 143.990900 |
|
|3| 2.362000| 2.274931| 0.428400| 0.143800| 0.248300| 0.248200| 0.532000| 139.585900 |