End of training

Files changed (5) hide show

README.md CHANGED Viewed

@@ -78,7 +78,7 @@ resume_from_checkpoint: null
 s2_attention: null
 sample_packing: false
 saves_per_epoch: 1
-sequence_len: 1024
 strict: false
 tf32: false
 tokenizer_type: AutoTokenizer
@@ -103,7 +103,7 @@ xformers_attention: true
 This model is a fine-tuned version of [Qwen/Qwen1.5-0.5B-Chat](https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.6344
 ## Model description
@@ -135,7 +135,7 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 0.5982        | 0.0001 | 10   | 0.6344          |
 ### Framework versions

 s2_attention: null
 sample_packing: false
 saves_per_epoch: 1
+sequence_len: 2048
 strict: false
 tf32: false
 tokenizer_type: AutoTokenizer
 This model is a fine-tuned version of [Qwen/Qwen1.5-0.5B-Chat](https://huggingface.co/Qwen/Qwen1.5-0.5B-Chat) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.3129
 ## Model description
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 0.6412        | 0.0001 | 10   | 0.3129          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -20,13 +20,13 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
-    "q_proj",
     "o_proj",
-    "up_proj",
     "v_proj",
-    "k_proj",
     "down_proj",
-    "gate_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "gate_proj",
+    "k_proj",
     "o_proj",
     "v_proj",
+    "q_proj",
     "down_proj",
+    "up_proj"
   ],
   "task_type": "CAUSAL_LM",
   "use_dora": false,

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:0fbcb485708cf73bd3e8c2b73acc10f131747be9e1a8af3c42a342619eb9f871
 size 30398410

 version https://git-lfs.github.com/spec/v1
+oid sha256:90c22ccae38b8f2676cbe082cb2ff6b0ed7cf5001e4a4a9320a7698f41b3b763
 size 30398410

adapter_model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:cc4ec68431f55df9b6dc0e1694da8d6d7390eca202d7e0dba6ef380aa6f2589a
 size 30322120

 version https://git-lfs.github.com/spec/v1
+oid sha256:3fb949e5a5abcac622e882d40aef4a8fedf6e36fa56178b25f714c34872949ee
 size 30322120

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1de78312fc7b8bf2bd311bd2109386cd6196800e3c13b07c9dce2a163690857b
 size 6776

 version https://git-lfs.github.com/spec/v1
+oid sha256:c6a31c5aa7aace24fede26d1803f3475f502732524a502a70e1277a113c8ffc3
 size 6776