howard commited on
Commit
afd27cc
β€’
1 Parent(s): 713e8b8
README.md CHANGED
@@ -93,7 +93,7 @@ special_tokens:
93
 
94
  This model is a fine-tuned version of [unsloth/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct) on the None dataset.
95
  It achieves the following results on the evaluation set:
96
- - Loss: 0.7998
97
 
98
  ## Model description
99
 
@@ -117,9 +117,9 @@ The following hyperparameters were used during training:
117
  - eval_batch_size: 4
118
  - seed: 42
119
  - distributed_type: multi-GPU
120
- - num_devices: 6
121
- - total_train_batch_size: 24
122
- - total_eval_batch_size: 24
123
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
124
  - lr_scheduler_type: cosine
125
  - lr_scheduler_warmup_steps: 10
@@ -129,10 +129,10 @@ The following hyperparameters were used during training:
129
 
130
  | Training Loss | Epoch | Step | Validation Loss |
131
  |:-------------:|:------:|:----:|:---------------:|
132
- | 1.3083 | 0.0011 | 1 | 1.0894 |
133
- | 0.9442 | 0.2505 | 228 | 0.8245 |
134
- | 0.9134 | 0.5011 | 456 | 0.8050 |
135
- | 0.8844 | 0.7516 | 684 | 0.7998 |
136
 
137
 
138
  ### Framework versions
 
93
 
94
  This model is a fine-tuned version of [unsloth/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct) on the None dataset.
95
  It achieves the following results on the evaluation set:
96
+ - Loss: 0.8081
97
 
98
  ## Model description
99
 
 
117
  - eval_batch_size: 4
118
  - seed: 42
119
  - distributed_type: multi-GPU
120
+ - num_devices: 2
121
+ - total_train_batch_size: 8
122
+ - total_eval_batch_size: 8
123
  - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
124
  - lr_scheduler_type: cosine
125
  - lr_scheduler_warmup_steps: 10
 
129
 
130
  | Training Loss | Epoch | Step | Validation Loss |
131
  |:-------------:|:------:|:----:|:---------------:|
132
+ | 1.2732 | 0.0004 | 1 | 1.0894 |
133
+ | 0.8739 | 0.2503 | 683 | 0.8078 |
134
+ | 0.8836 | 0.5005 | 1366 | 0.8037 |
135
+ | 0.7952 | 0.7508 | 2049 | 0.8081 |
136
 
137
 
138
  ### Framework versions
adapter_config.json CHANGED
@@ -20,12 +20,12 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
23
  "q_proj",
24
- "o_proj",
25
  "v_proj",
26
- "k_proj",
27
  "down_proj",
28
- "up_proj",
29
  "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "k_proj",
24
+ "up_proj",
25
  "q_proj",
 
26
  "v_proj",
27
+ "o_proj",
28
  "down_proj",
 
29
  "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
adapter_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a9eff459dd65bb31260ed39b43f3210bfc6c070c1cbbab8505fd7e270c4f1bf
3
  size 84047370
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4d22d1c0dd64b6925d3e6056dcbe13ebcacae210086e33aea8586785ceea96c4
3
  size 84047370
{checkpoint-910 β†’ checkpoint-2729}/README.md RENAMED
File without changes
{checkpoint-910 β†’ checkpoint-2729}/adapter_config.json RENAMED
@@ -20,12 +20,12 @@
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
 
 
23
  "q_proj",
24
- "o_proj",
25
  "v_proj",
26
- "k_proj",
27
  "down_proj",
28
- "up_proj",
29
  "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
 
20
  "rank_pattern": {},
21
  "revision": null,
22
  "target_modules": [
23
+ "k_proj",
24
+ "up_proj",
25
  "q_proj",
 
26
  "v_proj",
27
+ "o_proj",
28
  "down_proj",
 
29
  "gate_proj"
30
  ],
31
  "task_type": "CAUSAL_LM",
{checkpoint-910 β†’ checkpoint-2729}/adapter_model.safetensors RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1fd6b96d72797d7ab00a14c721a1caa958807cb354aaf5fc1de1c7c8396ae765
3
  size 83945296
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:85d0a72feed7dd29d25cc60d757d2208a0256907f1e908781e3e3c7c068229f2
3
  size 83945296
{checkpoint-910 β†’ checkpoint-2729}/optimizer.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7356033d918e8cade4c5d4707ccb3221c9f972d8ea9ed5471eb01198390ade02
3
  size 168037626
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc446715b6df84ca30726c60c826cf728dcd447bc7c5f1cb3529f6d8815aecdb
3
  size 168037626
{checkpoint-910 β†’ checkpoint-2729}/rng_state_0.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6b734ddd8b545089542d0069f0d3e242822791297232fd4cfc0fcd8dab3fac26
3
- size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64d3290a1aebcab0f5f968ac4d92a5dc623bcd379fc9829f7d4373f5272a1d11
3
+ size 14512
{checkpoint-910 β†’ checkpoint-2729}/rng_state_1.pth RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:65b7b2a05b67feff206f7f1c4e1fbb925596072a3ed04e463547880239e9305f
3
- size 15472
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9df9380368f9b6ecc293abe5159e36567ae9306cac1425c03c3efb4ebca2358e
3
+ size 14512
{checkpoint-910 β†’ checkpoint-2729}/scheduler.pt RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f27b676446b750ed756a49515dae83a30a332fca7d86ab020a4138eb5693b4e8
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74339daba51f99da3291ac96e4330be9316bf512e9d279b30e983780bd6a60e5
3
  size 1064
{checkpoint-910 β†’ checkpoint-2729}/special_tokens_map.json RENAMED
File without changes
{checkpoint-910 β†’ checkpoint-2729}/tokenizer.json RENAMED
File without changes
{checkpoint-910 β†’ checkpoint-2729}/tokenizer_config.json RENAMED
File without changes
checkpoint-2729/trainer_state.json ADDED
The diff for this file is too large to render. See raw diff
 
{checkpoint-910 β†’ checkpoint-2729}/training_args.bin RENAMED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2864251821ba31b5ec7e24cfbdaabb814a58336d03e65f91d390075f679cf7b3
3
  size 6200
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1fa20c05d8309ba2c9ec7b0442b3c3133c0181fb585461f1ff3f0b80d81d7c4c
3
  size 6200
checkpoint-910/rng_state_2.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:5c55498625b2fe17dee3579380e9b74598e2bcb457befffdeb01340b1aad9ff5
3
- size 15472
 
 
 
 
checkpoint-910/rng_state_3.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a429a274989863af451f371fbf62d966ed83036d2a7058f3a63a9ad06155719
3
- size 15472
 
 
 
 
checkpoint-910/rng_state_4.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:46785cf59ef0cb42fbd4d9ad8fd481c74c0dc95fd6f2c90b0f1bcdf24cb438cd
3
- size 15472
 
 
 
 
checkpoint-910/rng_state_5.pth DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:50b45630936093e329c0379be0ced2124c7db87eb74fc33a0637c75e956f0646
3
- size 15472
 
 
 
 
checkpoint-910/trainer_state.json DELETED
The diff for this file is too large to render. See raw diff