End of training

Browse files

Files changed (8) hide show

README.md +57 -3
adapter_config.json +34 -0
adapter_model.safetensors +3 -0
runs/Jul23_11-19-29_MasanoMacBook-Air-2283.local/events.out.tfevents.1721701169.MasanoMacBook-Air-2283.local +3 -0
runs/Jul23_11-22-11_MasanoMacBook-Air-2283.local/events.out.tfevents.1721701331.MasanoMacBook-Air-2283.local +3 -0
runs/Jul23_12-27-17_MasanoMacBook-Air-2283.local/events.out.tfevents.1721705237.MasanoMacBook-Air-2283.local +3 -0
trainer_state.json +196 -0
training_args.bin +3 -0

README.md CHANGED Viewed

@@ -1,3 +1,57 @@
----
-license: apache-2.0
----

+---
+license: apache-2.0
+library_name: peft
+tags:
+- generated_from_trainer
+base_model: HuggingFaceTB/SmolLM-135M-Instruct
+model-index:
+- name: smollm-ft-onlyqa-adapter
+  results: []
+---
+<!-- This model card has been generated automatically according to the information the Trainer had access to. You
+should probably proofread and complete it, then remove this comment. -->
+# smollm-ft-onlyqa-adapter
+This model is a fine-tuned version of [HuggingFaceTB/SmolLM-135M-Instruct](https://huggingface.co/HuggingFaceTB/SmolLM-135M-Instruct) on the None dataset.
+## Model description
+More information needed
+## Intended uses & limitations
+More information needed
+## Training and evaluation data
+More information needed
+## Training procedure
+### Training hyperparameters
+The following hyperparameters were used during training:
+- learning_rate: 0.001
+- train_batch_size: 1
+- eval_batch_size: 8
+- seed: 42
+- gradient_accumulation_steps: 4
+- total_train_batch_size: 4
+- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
+- lr_scheduler_type: linear
+- lr_scheduler_warmup_steps: 5
+- num_epochs: 4
+### Training results
+### Framework versions
+- PEFT 0.11.1
+- Transformers 4.41.1
+- Pytorch 2.3.0
+- Datasets 2.20.0
+- Tokenizers 0.19.1

adapter_config.json ADDED Viewed

	@@ -0,0 +1,34 @@

+{
+  "alpha_pattern": {},
+  "auto_mapping": null,
+  "base_model_name_or_path": "HuggingFaceTB/SmolLM-135M-Instruct",
+  "bias": "none",
+  "fan_in_fan_out": false,
+  "inference_mode": true,
+  "init_lora_weights": true,
+  "layer_replication": null,
+  "layers_pattern": null,
+  "layers_to_transform": null,
+  "loftq_config": {},
+  "lora_alpha": 32,
+  "lora_dropout": 0.05,
+  "megatron_config": null,
+  "megatron_core": "megatron.core",
+  "modules_to_save": null,
+  "peft_type": "LORA",
+  "r": 32,
+  "rank_pattern": {},
+  "revision": null,
+  "target_modules": [
+    "self_attn.v_proj",
+    "mlp.down_proj",
+    "self_attn.q_proj",
+    "self_attn.o_proj",
+    "mlp.gate_proj",
+    "mlp.up_proj",
+    "self_attn.k_proj"
+  ],
+  "task_type": "CAUSAL_LM",
+  "use_dora": false,
+  "use_rslora": false
+}

adapter_model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3c58005299e0c2425c71624cbd289f112945877da3d126ab2d0e20008ed7a365
+size 39131224

runs/Jul23_11-19-29_MasanoMacBook-Air-2283.local/events.out.tfevents.1721701169.MasanoMacBook-Air-2283.local ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3f0d310d004ec74be56767c18cff03121e9d8185443dcf71805b3052429cdd72
+size 4883

runs/Jul23_11-22-11_MasanoMacBook-Air-2283.local/events.out.tfevents.1721701331.MasanoMacBook-Air-2283.local ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:9bc0a7d852940d40707beed7169610a90b74eb2f5308ffd219f6102e6fb6b8ad
+size 5503

runs/Jul23_12-27-17_MasanoMacBook-Air-2283.local/events.out.tfevents.1721705237.MasanoMacBook-Air-2283.local ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:f576b296bb26ce22a5ae823a31cac7c98bebaf8eb1d4acb08f0e0a24240bb166
+size 9783

trainer_state.json ADDED Viewed

	@@ -0,0 +1,196 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.9646017699115044,
+  "eval_steps": 500,
+  "global_step": 112,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.17699115044247787,
+      "grad_norm": 0.28110718727111816,
+      "learning_rate": 0.001,
+      "loss": 2.0914,
+      "step": 5
+    },
+    {
+      "epoch": 0.35398230088495575,
+      "grad_norm": 0.4683087170124054,
+      "learning_rate": 0.0009532710280373832,
+      "loss": 68.6106,
+      "step": 10
+    },
+    {
+      "epoch": 0.5309734513274337,
+      "grad_norm": 0.391385018825531,
+      "learning_rate": 0.0009065420560747663,
+      "loss": 6.5177369285288705e+28,
+      "step": 15
+    },
+    {
+      "epoch": 0.7079646017699115,
+      "grad_norm": 0.28258973360061646,
+      "learning_rate": 0.0008598130841121495,
+      "loss": 1.3377,
+      "step": 20
+    },
+    {
+      "epoch": 0.8849557522123894,
+      "grad_norm": 0.19995203614234924,
+      "learning_rate": 0.0008130841121495327,
+      "loss": 1.3511,
+      "step": 25
+    },
+    {
+      "epoch": 1.0619469026548674,
+      "grad_norm": 0.19121702015399933,
+      "learning_rate": 0.0007663551401869159,
+      "loss": 1.2251,
+      "step": 30
+    },
+    {
+      "epoch": 1.238938053097345,
+      "grad_norm": 0.24388529360294342,
+      "learning_rate": 0.0007196261682242991,
+      "loss": 140.8433,
+      "step": 35
+    },
+    {
+      "epoch": 1.415929203539823,
+      "grad_norm": 0.2640773355960846,
+      "learning_rate": 0.0006728971962616822,
+      "loss": 105.5469,
+      "step": 40
+    },
+    {
+      "epoch": 1.592920353982301,
+      "grad_norm": 0.21957242488861084,
+      "learning_rate": 0.0006261682242990654,
+      "loss": 1.1374,
+      "step": 45
+    },
+    {
+      "epoch": 1.7699115044247788,
+      "grad_norm": 0.2884615957736969,
+      "learning_rate": 0.0005794392523364486,
+      "loss": 4.329361321642949e+28,
+      "step": 50
+    },
+    {
+      "epoch": 1.9469026548672566,
+      "grad_norm": 0.3385254442691803,
+      "learning_rate": 0.0005327102803738317,
+      "loss": 1.0632,
+      "step": 55
+    },
+    {
+      "epoch": 2.1238938053097347,
+      "grad_norm": 0.2307429313659668,
+      "learning_rate": 0.00048598130841121494,
+      "loss": 0.8567,
+      "step": 60
+    },
+    {
+      "epoch": 2.3008849557522124,
+      "grad_norm": 0.22807413339614868,
+      "learning_rate": 0.00043925233644859815,
+      "loss": 0.8818,
+      "step": 65
+    },
+    {
+      "epoch": 2.47787610619469,
+      "grad_norm": 0.3359227478504181,
+      "learning_rate": 0.0003925233644859813,
+      "loss": 1.4346974010152121e+29,
+      "step": 70
+    },
+    {
+      "epoch": 2.6548672566371683,
+      "grad_norm": 0.35234493017196655,
+      "learning_rate": 0.00034579439252336447,
+      "loss": 105.9515,
+      "step": 75
+    },
+    {
+      "epoch": 2.831858407079646,
+      "grad_norm": 0.2722542881965637,
+      "learning_rate": 0.00029906542056074763,
+      "loss": 1.0193,
+      "step": 80
+    },
+    {
+      "epoch": 3.0088495575221237,
+      "grad_norm": 0.2476160079240799,
+      "learning_rate": 0.00025233644859813084,
+      "loss": 0.9268,
+      "step": 85
+    },
+    {
+      "epoch": 3.185840707964602,
+      "grad_norm": 0.2469383180141449,
+      "learning_rate": 0.00020560747663551403,
+      "loss": 0.8098,
+      "step": 90
+    },
+    {
+      "epoch": 3.3628318584070795,
+      "grad_norm": 0.2519451975822449,
+      "learning_rate": 0.0001588785046728972,
+      "loss": 0.8464,
+      "step": 95
+    },
+    {
+      "epoch": 3.5398230088495577,
+      "grad_norm": 0.29985764622688293,
+      "learning_rate": 0.00011214953271028037,
+      "loss": 1.3981207458798617e+29,
+      "step": 100
+    },
+    {
+      "epoch": 3.7168141592920354,
+      "grad_norm": 1.0310410261154175,
+      "learning_rate": 6.542056074766355e-05,
+      "loss": 6.232082018900711e+27,
+      "step": 105
+    },
+    {
+      "epoch": 3.893805309734513,
+      "grad_norm": 0.25868210196495056,
+      "learning_rate": 1.8691588785046727e-05,
+      "loss": 0.8616,
+      "step": 110
+    },
+    {
+      "epoch": 3.9646017699115044,
+      "step": 112,
+      "total_flos": 127914559565184.0,
+      "train_loss": 1.7767182107594924e+28,
+      "train_runtime": 1986.7669,
+      "train_samples_per_second": 0.228,
+      "train_steps_per_second": 0.056
+    }
+  ],
+  "logging_steps": 5,
+  "max_steps": 112,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 4,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": false,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 127914559565184.0,
+  "train_batch_size": 1,
+  "trial_name": null,
+  "trial_params": null
+}

training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:6416ae0d260f12f2a4806e523aae6855d34cd9ef9b9aa1f96399c9c082a0a11e
+size 5112