howard
commited on
Commit
β’
afd27cc
1
Parent(s):
713e8b8
update
Browse files- README.md +8 -8
- adapter_config.json +3 -3
- adapter_model.bin +1 -1
- {checkpoint-910 β checkpoint-2729}/README.md +0 -0
- {checkpoint-910 β checkpoint-2729}/adapter_config.json +3 -3
- {checkpoint-910 β checkpoint-2729}/adapter_model.safetensors +1 -1
- {checkpoint-910 β checkpoint-2729}/optimizer.pt +1 -1
- {checkpoint-910 β checkpoint-2729}/rng_state_0.pth +2 -2
- {checkpoint-910 β checkpoint-2729}/rng_state_1.pth +2 -2
- {checkpoint-910 β checkpoint-2729}/scheduler.pt +1 -1
- {checkpoint-910 β checkpoint-2729}/special_tokens_map.json +0 -0
- {checkpoint-910 β checkpoint-2729}/tokenizer.json +0 -0
- {checkpoint-910 β checkpoint-2729}/tokenizer_config.json +0 -0
- checkpoint-2729/trainer_state.json +0 -0
- {checkpoint-910 β checkpoint-2729}/training_args.bin +1 -1
- checkpoint-910/rng_state_2.pth +0 -3
- checkpoint-910/rng_state_3.pth +0 -3
- checkpoint-910/rng_state_4.pth +0 -3
- checkpoint-910/rng_state_5.pth +0 -3
- checkpoint-910/trainer_state.json +0 -0
README.md
CHANGED
@@ -93,7 +93,7 @@ special_tokens:
|
|
93 |
|
94 |
This model is a fine-tuned version of [unsloth/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct) on the None dataset.
|
95 |
It achieves the following results on the evaluation set:
|
96 |
-
- Loss: 0.
|
97 |
|
98 |
## Model description
|
99 |
|
@@ -117,9 +117,9 @@ The following hyperparameters were used during training:
|
|
117 |
- eval_batch_size: 4
|
118 |
- seed: 42
|
119 |
- distributed_type: multi-GPU
|
120 |
-
- num_devices:
|
121 |
-
- total_train_batch_size:
|
122 |
-
- total_eval_batch_size:
|
123 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
124 |
- lr_scheduler_type: cosine
|
125 |
- lr_scheduler_warmup_steps: 10
|
@@ -129,10 +129,10 @@ The following hyperparameters were used during training:
|
|
129 |
|
130 |
| Training Loss | Epoch | Step | Validation Loss |
|
131 |
|:-------------:|:------:|:----:|:---------------:|
|
132 |
-
| 1.
|
133 |
-
| 0.
|
134 |
-
| 0.
|
135 |
-
| 0.
|
136 |
|
137 |
|
138 |
### Framework versions
|
|
|
93 |
|
94 |
This model is a fine-tuned version of [unsloth/Meta-Llama-3.1-8B-Instruct](https://huggingface.co/unsloth/Meta-Llama-3.1-8B-Instruct) on the None dataset.
|
95 |
It achieves the following results on the evaluation set:
|
96 |
+
- Loss: 0.8081
|
97 |
|
98 |
## Model description
|
99 |
|
|
|
117 |
- eval_batch_size: 4
|
118 |
- seed: 42
|
119 |
- distributed_type: multi-GPU
|
120 |
+
- num_devices: 2
|
121 |
+
- total_train_batch_size: 8
|
122 |
+
- total_eval_batch_size: 8
|
123 |
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
124 |
- lr_scheduler_type: cosine
|
125 |
- lr_scheduler_warmup_steps: 10
|
|
|
129 |
|
130 |
| Training Loss | Epoch | Step | Validation Loss |
|
131 |
|:-------------:|:------:|:----:|:---------------:|
|
132 |
+
| 1.2732 | 0.0004 | 1 | 1.0894 |
|
133 |
+
| 0.8739 | 0.2503 | 683 | 0.8078 |
|
134 |
+
| 0.8836 | 0.5005 | 1366 | 0.8037 |
|
135 |
+
| 0.7952 | 0.7508 | 2049 | 0.8081 |
|
136 |
|
137 |
|
138 |
### Framework versions
|
adapter_config.json
CHANGED
@@ -20,12 +20,12 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
|
|
|
|
23 |
"q_proj",
|
24 |
-
"o_proj",
|
25 |
"v_proj",
|
26 |
-
"
|
27 |
"down_proj",
|
28 |
-
"up_proj",
|
29 |
"gate_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
+
"k_proj",
|
24 |
+
"up_proj",
|
25 |
"q_proj",
|
|
|
26 |
"v_proj",
|
27 |
+
"o_proj",
|
28 |
"down_proj",
|
|
|
29 |
"gate_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
adapter_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 84047370
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4d22d1c0dd64b6925d3e6056dcbe13ebcacae210086e33aea8586785ceea96c4
|
3 |
size 84047370
|
{checkpoint-910 β checkpoint-2729}/README.md
RENAMED
File without changes
|
{checkpoint-910 β checkpoint-2729}/adapter_config.json
RENAMED
@@ -20,12 +20,12 @@
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
|
|
|
|
23 |
"q_proj",
|
24 |
-
"o_proj",
|
25 |
"v_proj",
|
26 |
-
"
|
27 |
"down_proj",
|
28 |
-
"up_proj",
|
29 |
"gate_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
|
|
20 |
"rank_pattern": {},
|
21 |
"revision": null,
|
22 |
"target_modules": [
|
23 |
+
"k_proj",
|
24 |
+
"up_proj",
|
25 |
"q_proj",
|
|
|
26 |
"v_proj",
|
27 |
+
"o_proj",
|
28 |
"down_proj",
|
|
|
29 |
"gate_proj"
|
30 |
],
|
31 |
"task_type": "CAUSAL_LM",
|
{checkpoint-910 β checkpoint-2729}/adapter_model.safetensors
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 83945296
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:85d0a72feed7dd29d25cc60d757d2208a0256907f1e908781e3e3c7c068229f2
|
3 |
size 83945296
|
{checkpoint-910 β checkpoint-2729}/optimizer.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 168037626
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:dc446715b6df84ca30726c60c826cf728dcd447bc7c5f1cb3529f6d8815aecdb
|
3 |
size 168037626
|
{checkpoint-910 β checkpoint-2729}/rng_state_0.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:64d3290a1aebcab0f5f968ac4d92a5dc623bcd379fc9829f7d4373f5272a1d11
|
3 |
+
size 14512
|
{checkpoint-910 β checkpoint-2729}/rng_state_1.pth
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9df9380368f9b6ecc293abe5159e36567ae9306cac1425c03c3efb4ebca2358e
|
3 |
+
size 14512
|
{checkpoint-910 β checkpoint-2729}/scheduler.pt
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:74339daba51f99da3291ac96e4330be9316bf512e9d279b30e983780bd6a60e5
|
3 |
size 1064
|
{checkpoint-910 β checkpoint-2729}/special_tokens_map.json
RENAMED
File without changes
|
{checkpoint-910 β checkpoint-2729}/tokenizer.json
RENAMED
File without changes
|
{checkpoint-910 β checkpoint-2729}/tokenizer_config.json
RENAMED
File without changes
|
checkpoint-2729/trainer_state.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
{checkpoint-910 β checkpoint-2729}/training_args.bin
RENAMED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 6200
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:1fa20c05d8309ba2c9ec7b0442b3c3133c0181fb585461f1ff3f0b80d81d7c4c
|
3 |
size 6200
|
checkpoint-910/rng_state_2.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:5c55498625b2fe17dee3579380e9b74598e2bcb457befffdeb01340b1aad9ff5
|
3 |
-
size 15472
|
|
|
|
|
|
|
|
checkpoint-910/rng_state_3.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:8a429a274989863af451f371fbf62d966ed83036d2a7058f3a63a9ad06155719
|
3 |
-
size 15472
|
|
|
|
|
|
|
|
checkpoint-910/rng_state_4.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:46785cf59ef0cb42fbd4d9ad8fd481c74c0dc95fd6f2c90b0f1bcdf24cb438cd
|
3 |
-
size 15472
|
|
|
|
|
|
|
|
checkpoint-910/rng_state_5.pth
DELETED
@@ -1,3 +0,0 @@
|
|
1 |
-
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:50b45630936093e329c0379be0ced2124c7db87eb74fc33a0637c75e956f0646
|
3 |
-
size 15472
|
|
|
|
|
|
|
|
checkpoint-910/trainer_state.json
DELETED
The diff for this file is too large to render.
See raw diff
|
|