End of training

Browse files

Files changed (9) hide show

README.md +146 -0
config.json +33 -0
generation_config.json +7 -0
logs/events.out.tfevents.1706093926.ESBeastModeOn.23992.0 +3 -0
model.safetensors +3 -0
special_tokens_map.json +23 -0
spiece.model +3 -0
tokenizer_config.json +38 -0
training_args.bin +3 -0

README.md ADDED Viewed

	@@ -0,0 +1,146 @@

+---
+license: apache-2.0
+base_model: google/mt5-base
+tags:
+- generated_from_trainer
+metrics:
+- rouge
+model-index:
+- name: mt5-translated-lithuanian-simplifier
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# mt5-translated-lithuanian-simplifier
+This model is a fine-tuned version of [google/mt5-base](https://huggingface.co/google/mt5-base) on the None dataset.
+It achieves the following results on the evaluation set:
+- Loss: 0.0761
+- Rouge1: 0.7877
+- Rouge2: 0.6566
+- Rougel: 0.7845
+- Gen Len: 49.2293
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.0001
+- train_batch_size: 4
+- eval_batch_size: 4
+- seed: 42
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 500
+- num_epochs: 8
+### Training results
+| Training Loss | Epoch | Step  | Validation Loss | Rouge1 | Rouge2 | Rougel | Gen Len |
+|:-------------:|:-----:|:-----:|:---------------:|:------:|:------:|:------:|:-------:|
+| 23.9322       | 0.1   | 200   | 19.1649         | 0.016  | 0.0004 | 0.0146 | 512.0   |
+| 2.5416        | 0.19  | 400   | 1.4406          | 0.035  | 0.0002 | 0.0345 | 51.3394 |
+| 0.7449        | 0.29  | 600   | 0.7221          | 0.0021 | 0.0    | 0.0021 | 50.2293 |
+| 0.4405        | 0.38  | 800   | 0.2164          | 0.5491 | 0.3593 | 0.5367 | 49.4955 |
+| 0.177         | 0.48  | 1000  | 0.1672          | 0.6294 | 0.4636 | 0.6209 | 49.2293 |
+| 0.1838        | 0.57  | 1200  | 0.1561          | 0.6214 | 0.4375 | 0.613  | 49.2293 |
+| 0.1471        | 0.67  | 1400  | 0.1295          | 0.7071 | 0.5673 | 0.6998 | 49.2293 |
+| 0.1622        | 0.77  | 1600  | 0.1229          | 0.6929 | 0.5402 | 0.6858 | 49.2293 |
+| 0.1255        | 0.86  | 1800  | 0.1192          | 0.7044 | 0.5547 | 0.6978 | 49.2293 |
+| 0.1281        | 0.96  | 2000  | 0.1150          | 0.7169 | 0.5718 | 0.7103 | 49.2293 |
+| 0.1561        | 1.05  | 2200  | 0.1088          | 0.7165 | 0.5688 | 0.7108 | 49.2293 |
+| 0.145         | 1.15  | 2400  | 0.1064          | 0.7321 | 0.5921 | 0.7263 | 49.2293 |
+| 0.1207        | 1.25  | 2600  | 0.1030          | 0.7348 | 0.5957 | 0.7291 | 49.2293 |
+| 0.1151        | 1.34  | 2800  | 0.1014          | 0.7289 | 0.5859 | 0.7239 | 49.2293 |
+| 0.1001        | 1.44  | 3000  | 0.0983          | 0.7402 | 0.6003 | 0.7349 | 49.2293 |
+| 0.1354        | 1.53  | 3200  | 0.0963          | 0.738  | 0.598  | 0.7332 | 49.2293 |
+| 0.1092        | 1.63  | 3400  | 0.0978          | 0.7446 | 0.607  | 0.7394 | 49.2293 |
+| 0.1109        | 1.72  | 3600  | 0.0973          | 0.7427 | 0.6034 | 0.7377 | 49.2293 |
+| 0.1083        | 1.82  | 3800  | 0.0950          | 0.7479 | 0.6094 | 0.7432 | 49.2293 |
+| 0.1348        | 1.92  | 4000  | 0.0958          | 0.7498 | 0.6121 | 0.745  | 49.2293 |
+| 0.1004        | 2.01  | 4200  | 0.0898          | 0.7539 | 0.6152 | 0.7494 | 49.2293 |
+| 0.1131        | 2.11  | 4400  | 0.0925          | 0.753  | 0.6154 | 0.7488 | 49.2293 |
+| 0.1312        | 2.2   | 4600  | 0.0919          | 0.755  | 0.6183 | 0.7508 | 49.2293 |
+| 0.1139        | 2.3   | 4800  | 0.0908          | 0.756  | 0.6182 | 0.7518 | 49.2293 |
+| 0.1168        | 2.39  | 5000  | 0.0880          | 0.7574 | 0.6202 | 0.7533 | 49.2293 |
+| 0.0793        | 2.49  | 5200  | 0.0897          | 0.7575 | 0.6193 | 0.7531 | 49.2293 |
+| 0.0869        | 2.59  | 5400  | 0.0866          | 0.7605 | 0.6228 | 0.7564 | 49.2293 |
+| 0.1053        | 2.68  | 5600  | 0.0870          | 0.7594 | 0.6203 | 0.7551 | 49.2293 |
+| 0.0889        | 2.78  | 5800  | 0.0893          | 0.7609 | 0.6237 | 0.7568 | 49.2293 |
+| 0.0982        | 2.87  | 6000  | 0.0873          | 0.7637 | 0.6279 | 0.7599 | 49.2293 |
+| 0.0838        | 2.97  | 6200  | 0.0846          | 0.7665 | 0.6309 | 0.7626 | 49.2293 |
+| 0.0829        | 3.07  | 6400  | 0.0844          | 0.7665 | 0.6315 | 0.7629 | 49.2293 |
+| 0.068         | 3.16  | 6600  | 0.0836          | 0.7695 | 0.6358 | 0.7658 | 49.2293 |
+| 0.0747        | 3.26  | 6800  | 0.0848          | 0.7675 | 0.6322 | 0.7639 | 49.2293 |
+| 0.0792        | 3.35  | 7000  | 0.0840          | 0.7691 | 0.6342 | 0.7656 | 49.2293 |
+| 0.0739        | 3.45  | 7200  | 0.0820          | 0.7713 | 0.6365 | 0.7676 | 49.2293 |
+| 0.0793        | 3.54  | 7400  | 0.0813          | 0.7723 | 0.6374 | 0.7685 | 49.2293 |
+| 0.0908        | 3.64  | 7600  | 0.0819          | 0.7731 | 0.6388 | 0.7696 | 49.2293 |
+| 0.1125        | 3.74  | 7800  | 0.0811          | 0.774  | 0.6402 | 0.7705 | 49.2293 |
+| 0.1231        | 3.83  | 8000  | 0.0805          | 0.7736 | 0.6391 | 0.7699 | 49.2293 |
+| 0.0805        | 3.93  | 8200  | 0.0806          | 0.7736 | 0.6383 | 0.7698 | 49.2293 |
+| 0.0798        | 4.02  | 8400  | 0.0806          | 0.7758 | 0.6413 | 0.7726 | 49.2293 |
+| 0.061         | 4.12  | 8600  | 0.0807          | 0.7738 | 0.6391 | 0.7705 | 49.2293 |
+| 0.0636        | 4.21  | 8800  | 0.0810          | 0.7763 | 0.6424 | 0.7731 | 49.2293 |
+| 0.0813        | 4.31  | 9000  | 0.0798          | 0.7765 | 0.6418 | 0.7731 | 49.2293 |
+| 0.0664        | 4.41  | 9200  | 0.0804          | 0.7779 | 0.6441 | 0.7744 | 49.2293 |
+| 0.077         | 4.5   | 9400  | 0.0783          | 0.7775 | 0.6432 | 0.774  | 49.2293 |
+| 0.0769        | 4.6   | 9600  | 0.0788          | 0.7786 | 0.6446 | 0.7752 | 49.2293 |
+| 0.0874        | 4.69  | 9800  | 0.0796          | 0.7782 | 0.6455 | 0.7749 | 49.2293 |
+| 0.0682        | 4.79  | 10000 | 0.0784          | 0.7783 | 0.6452 | 0.7752 | 49.2293 |
+| 0.0649        | 4.89  | 10200 | 0.0781          | 0.7788 | 0.6453 | 0.7757 | 49.2293 |
+| 0.0594        | 4.98  | 10400 | 0.0791          | 0.7795 | 0.6468 | 0.7762 | 49.2293 |
+| 0.1001        | 5.08  | 10600 | 0.0775          | 0.7794 | 0.6464 | 0.7762 | 49.2293 |
+| 0.065         | 5.17  | 10800 | 0.0794          | 0.7794 | 0.6474 | 0.7762 | 49.2293 |
+| 0.0505        | 5.27  | 11000 | 0.0787          | 0.7809 | 0.6481 | 0.7775 | 49.2293 |
+| 0.0904        | 5.36  | 11200 | 0.0772          | 0.7825 | 0.6504 | 0.7793 | 49.2293 |
+| 0.0782        | 5.46  | 11400 | 0.0777          | 0.7835 | 0.651  | 0.7803 | 49.2293 |
+| 0.0758        | 5.56  | 11600 | 0.0774          | 0.7823 | 0.6505 | 0.7792 | 49.2293 |
+| 0.0685        | 5.65  | 11800 | 0.0778          | 0.7819 | 0.6498 | 0.7787 | 49.2293 |
+| 0.0664        | 5.75  | 12000 | 0.0774          | 0.7818 | 0.6493 | 0.7786 | 49.2293 |
+| 0.0841        | 5.84  | 12200 | 0.0770          | 0.7848 | 0.6527 | 0.7813 | 49.2293 |
+| 0.0867        | 5.94  | 12400 | 0.0765          | 0.7844 | 0.6522 | 0.7812 | 49.2293 |
+| 0.0572        | 6.03  | 12600 | 0.0772          | 0.7849 | 0.6522 | 0.7816 | 49.2293 |
+| 0.0554        | 6.13  | 12800 | 0.0775          | 0.7844 | 0.6526 | 0.7812 | 49.2293 |
+| 0.0725        | 6.23  | 13000 | 0.0774          | 0.7851 | 0.6534 | 0.7822 | 49.2293 |
+| 0.0952        | 6.32  | 13200 | 0.0778          | 0.7848 | 0.6527 | 0.7817 | 49.2293 |
+| 0.0795        | 6.42  | 13400 | 0.0764          | 0.7858 | 0.6542 | 0.7826 | 49.2293 |
+| 0.0682        | 6.51  | 13600 | 0.0772          | 0.7852 | 0.6527 | 0.7819 | 49.2293 |
+| 0.0483        | 6.61  | 13800 | 0.0777          | 0.785  | 0.6525 | 0.7815 | 49.2293 |
+| 0.0725        | 6.7   | 14000 | 0.0767          | 0.7864 | 0.6545 | 0.7831 | 49.2293 |
+| 0.0675        | 6.8   | 14200 | 0.0773          | 0.786  | 0.6551 | 0.7827 | 49.2293 |
+| 0.0706        | 6.9   | 14400 | 0.0758          | 0.7867 | 0.6556 | 0.7837 | 49.2293 |
+| 0.0785        | 6.99  | 14600 | 0.0772          | 0.7866 | 0.6559 | 0.7835 | 49.2293 |
+| 0.0796        | 7.09  | 14800 | 0.0763          | 0.7872 | 0.6564 | 0.7841 | 49.2293 |
+| 0.0761        | 7.18  | 15000 | 0.0757          | 0.7879 | 0.6566 | 0.7848 | 49.2293 |
+| 0.0598        | 7.28  | 15200 | 0.0758          | 0.788  | 0.6568 | 0.7849 | 49.2293 |
+| 0.0587        | 7.38  | 15400 | 0.0768          | 0.7872 | 0.6556 | 0.7839 | 49.2293 |
+| 0.0859        | 7.47  | 15600 | 0.0765          | 0.7875 | 0.6559 | 0.7842 | 49.2293 |
+| 0.061         | 7.57  | 15800 | 0.0764          | 0.7876 | 0.6564 | 0.7845 | 49.2293 |
+| 0.0718        | 7.66  | 16000 | 0.0764          | 0.7871 | 0.6558 | 0.784  | 49.2293 |
+| 0.0695        | 7.76  | 16200 | 0.0763          | 0.7873 | 0.656  | 0.7842 | 49.2293 |
+| 0.0678        | 7.85  | 16400 | 0.0762          | 0.7875 | 0.6565 | 0.7844 | 49.2293 |
+| 0.0751        | 7.95  | 16600 | 0.0761          | 0.7877 | 0.6566 | 0.7845 | 49.2293 |
+### Framework versions
+- Transformers 4.36.2
+- Pytorch 2.1.1
+- Datasets 2.16.1
+- Tokenizers 0.15.0

config.json ADDED Viewed

	@@ -0,0 +1,33 @@

+{
+  "_name_or_path": "google/mt5-base",
+  "architectures": [
+    "T5ForConditionalGeneration"
+  ],
+  "classifier_dropout": 0.0,
+  "d_ff": 2048,
+  "d_kv": 64,
+  "d_model": 768,
+  "decoder_start_token_id": 0,
+  "dense_act_fn": "gelu_new",
+  "dropout_rate": 0.1,
+  "eos_token_id": 1,
+  "feed_forward_proj": "gated-gelu",
+  "initializer_factor": 1.0,
+  "is_encoder_decoder": true,
+  "is_gated_act": true,
+  "layer_norm_epsilon": 1e-06,
+  "model_type": "t5",
+  "num_decoder_layers": 12,
+  "num_heads": 12,
+  "num_layers": 12,
+  "output_past": true,
+  "pad_token_id": 0,
+  "relative_attention_max_distance": 128,
+  "relative_attention_num_buckets": 32,
+  "tie_word_embeddings": false,
+  "tokenizer_class": "T5Tokenizer",
+  "torch_dtype": "float32",
+  "transformers_version": "4.36.2",
+  "use_cache": true,
+  "vocab_size": 250112
+}

generation_config.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+  "_from_model_config": true,
+  "decoder_start_token_id": 0,
+  "eos_token_id": 1,
+  "pad_token_id": 0,
+  "transformers_version": "4.36.2"
+}

logs/events.out.tfevents.1706093926.ESBeastModeOn.23992.0 ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:be6a1fde886999a4e0866d47ce162519d5b3e9be5b2f2c7b75a9cc12ad6bd325
+size 306405

model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:c7f93b8ff6eeea2265910a88603f0fed2e4e9cf2d16fbaf2188b0365bcb4ee60
+size 2329638768

special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,23 @@

+{
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": false,
+    "rstrip": false,
+    "single_word": false
+  }
+}

spiece.model ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:ef78f86560d809067d12bac6c09f19a462cb3af3f54d2b8acbba26e1433125d6
+size 4309802

tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,38 @@

+{
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": false,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "additional_special_tokens": [],
+  "clean_up_tokenization_spaces": true,
+  "eos_token": "</s>",
+  "extra_ids": 0,
+  "legacy": true,
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "sp_model_kwargs": {},
+  "tokenizer_class": "T5Tokenizer",
+  "unk_token": "<unk>"
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:79116439cccaae422d028f86eb2a0a7ce5026cbccf411c2b323010e359fbfac6
+size 4792