howard commited on 8 days ago

Commit

afd27cc

•

1 Parent(s): 713e8b8

update

Files changed (20) hide show

README.md CHANGED Viewed

@@ -93,7 +93,7 @@ special_tokens:
 This model is a fine-tuned version of [unsloth/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct) on the None dataset.
 It achieves the following results on the evaluation set:
-- Loss: 0.7998
 ## Model description
@@ -117,9 +117,9 @@ The following hyperparameters were used during training:
 - eval_batch_size: 4
 - seed: 42
 - distributed_type: multi-GPU
-- num_devices: 6
-- total_train_batch_size: 24
-- total_eval_batch_size: 24
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
@@ -129,10 +129,10 @@ The following hyperparameters were used during training:
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
-| 1.3083        | 0.0011 | 1    | 1.0894          |
-| 0.9442        | 0.2505 | 228  | 0.8245          |
-| 0.9134        | 0.5011 | 456  | 0.8050          |
-| 0.8844        | 0.7516 | 684  | 0.7998          |
 ### Framework versions

 This model is a fine-tuned version of [unsloth/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct) on the None dataset.
 It achieves the following results on the evaluation set:
+- Loss: 0.8081
 ## Model description
 - eval_batch_size: 4
 - seed: 42
 - distributed_type: multi-GPU
+- num_devices: 2
+- total_train_batch_size: 8
+- total_eval_batch_size: 8
 - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
 - lr_scheduler_type: cosine
 - lr_scheduler_warmup_steps: 10
 | Training Loss | Epoch  | Step | Validation Loss |
 |:-------------:|:------:|:----:|:---------------:|
+| 1.2732        | 0.0004 | 1    | 1.0894          |
+| 0.8739        | 0.2503 | 683  | 0.8078          |
+| 0.8836        | 0.5005 | 1366 | 0.8037          |
+| 0.7952        | 0.7508 | 2049 | 0.8081          |
 ### Framework versions

adapter_config.json CHANGED Viewed

@@ -20,12 +20,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
-    "o_proj",
     "v_proj",
-    "k_proj",
     "down_proj",
-    "up_proj",
     "gate_proj"
   ],
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "k_proj",
+    "up_proj",
     "q_proj",
     "v_proj",
+    "o_proj",
     "down_proj",
     "gate_proj"
   ],
   "task_type": "CAUSAL_LM",

adapter_model.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1a9eff459dd65bb31260ed39b43f3210bfc6c070c1cbbab8505fd7e270c4f1bf
 size 84047370

 version https://git-lfs.github.com/spec/v1
+oid sha256:4d22d1c0dd64b6925d3e6056dcbe13ebcacae210086e33aea8586785ceea96c4
 size 84047370

{checkpoint-910 → checkpoint-2729}/README.md RENAMED Viewed

File without changes

{checkpoint-910 → checkpoint-2729}/adapter_config.json RENAMED Viewed

@@ -20,12 +20,12 @@
   "rank_pattern": {},
   "revision": null,
   "target_modules": [
     "q_proj",
-    "o_proj",
     "v_proj",
-    "k_proj",
     "down_proj",
-    "up_proj",
     "gate_proj"
   ],
   "task_type": "CAUSAL_LM",

   "rank_pattern": {},
   "revision": null,
   "target_modules": [
+    "k_proj",
+    "up_proj",
     "q_proj",
     "v_proj",
+    "o_proj",
     "down_proj",
     "gate_proj"
   ],
   "task_type": "CAUSAL_LM",

{checkpoint-910 → checkpoint-2729}/adapter_model.safetensors RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1fd6b96d72797d7ab00a14c721a1caa958807cb354aaf5fc1de1c7c8396ae765
 size 83945296

 version https://git-lfs.github.com/spec/v1
+oid sha256:85d0a72feed7dd29d25cc60d757d2208a0256907f1e908781e3e3c7c068229f2
 size 83945296

{checkpoint-910 → checkpoint-2729}/optimizer.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:7356033d918e8cade4c5d4707ccb3221c9f972d8ea9ed5471eb01198390ade02
 size 168037626

 version https://git-lfs.github.com/spec/v1
+oid sha256:dc446715b6df84ca30726c60c826cf728dcd447bc7c5f1cb3529f6d8815aecdb
 size 168037626

{checkpoint-910 → checkpoint-2729}/rng_state_0.pth RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:6b734ddd8b545089542d0069f0d3e242822791297232fd4cfc0fcd8dab3fac26
-size 15472

 version https://git-lfs.github.com/spec/v1
+oid sha256:64d3290a1aebcab0f5f968ac4d92a5dc623bcd379fc9829f7d4373f5272a1d11
+size 14512

{checkpoint-910 → checkpoint-2729}/rng_state_1.pth RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:65b7b2a05b67feff206f7f1c4e1fbb925596072a3ed04e463547880239e9305f
-size 15472

 version https://git-lfs.github.com/spec/v1
+oid sha256:9df9380368f9b6ecc293abe5159e36567ae9306cac1425c03c3efb4ebca2358e
+size 14512

{checkpoint-910 → checkpoint-2729}/scheduler.pt RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f27b676446b750ed756a49515dae83a30a332fca7d86ab020a4138eb5693b4e8
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:74339daba51f99da3291ac96e4330be9316bf512e9d279b30e983780bd6a60e5
 size 1064

{checkpoint-910 → checkpoint-2729}/special_tokens_map.json RENAMED Viewed

File without changes

{checkpoint-910 → checkpoint-2729}/tokenizer.json RENAMED Viewed

File without changes

{checkpoint-910 → checkpoint-2729}/tokenizer_config.json RENAMED Viewed

File without changes

checkpoint-2729/trainer_state.json ADDED Viewed

The diff for this file is too large to render. See raw diff

{checkpoint-910 → checkpoint-2729}/training_args.bin RENAMED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:2864251821ba31b5ec7e24cfbdaabb814a58336d03e65f91d390075f679cf7b3
 size 6200

 version https://git-lfs.github.com/spec/v1
+oid sha256:1fa20c05d8309ba2c9ec7b0442b3c3133c0181fb585461f1ff3f0b80d81d7c4c
 size 6200

checkpoint-910/rng_state_2.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:5c55498625b2fe17dee3579380e9b74598e2bcb457befffdeb01340b1aad9ff5
-size 15472

checkpoint-910/rng_state_3.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:8a429a274989863af451f371fbf62d966ed83036d2a7058f3a63a9ad06155719
-size 15472

checkpoint-910/rng_state_4.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:46785cf59ef0cb42fbd4d9ad8fd481c74c0dc95fd6f2c90b0f1bcdf24cb438cd
-size 15472

checkpoint-910/rng_state_5.pth DELETED Viewed

@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:50b45630936093e329c0379be0ced2124c7db87eb74fc33a0637c75e956f0646
-size 15472

checkpoint-910/trainer_state.json DELETED Viewed

The diff for this file is too large to render. See raw diff