sunsetsobserver
commited on
Commit
•
54340e4
1
Parent(s):
56d97b0
Upload 9 files
Browse files- README.md +161 -0
- all_results.json +7 -0
- config.json +26 -0
- generation_config.json +7 -0
- model.safetensors +3 -0
- tokenizer.json +0 -0
- train_results.json +7 -0
- trainer_state.json +0 -0
- training_args.bin +3 -0
README.md
ADDED
@@ -0,0 +1,161 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
---
|
2 |
+
tags:
|
3 |
+
- generated_from_trainer
|
4 |
+
metrics:
|
5 |
+
- accuracy
|
6 |
+
model-index:
|
7 |
+
- name: runs
|
8 |
+
results: []
|
9 |
+
---
|
10 |
+
|
11 |
+
<!-- This model card has been generated automatically according to the information the Trainer had access to. You
|
12 |
+
should probably proofread and complete it, then remove this comment. -->
|
13 |
+
|
14 |
+
# runs
|
15 |
+
|
16 |
+
This model is a fine-tuned version of [](https://huggingface.co/) on an unknown dataset.
|
17 |
+
It achieves the following results on the evaluation set:
|
18 |
+
- Loss: 24.0950
|
19 |
+
- Accuracy: 0.0013
|
20 |
+
|
21 |
+
## Model description
|
22 |
+
|
23 |
+
More information needed
|
24 |
+
|
25 |
+
## Intended uses & limitations
|
26 |
+
|
27 |
+
More information needed
|
28 |
+
|
29 |
+
## Training and evaluation data
|
30 |
+
|
31 |
+
More information needed
|
32 |
+
|
33 |
+
## Training procedure
|
34 |
+
|
35 |
+
### Training hyperparameters
|
36 |
+
|
37 |
+
The following hyperparameters were used during training:
|
38 |
+
- learning_rate: 0.0001
|
39 |
+
- train_batch_size: 16
|
40 |
+
- eval_batch_size: 48
|
41 |
+
- seed: 444
|
42 |
+
- gradient_accumulation_steps: 3
|
43 |
+
- total_train_batch_size: 48
|
44 |
+
- optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
|
45 |
+
- lr_scheduler_type: cosine_with_restarts
|
46 |
+
- lr_scheduler_warmup_ratio: 0.3
|
47 |
+
- training_steps: 100000
|
48 |
+
- mixed_precision_training: Native AMP
|
49 |
+
|
50 |
+
### Training results
|
51 |
+
|
52 |
+
| Training Loss | Epoch | Step | Validation Loss | Accuracy |
|
53 |
+
|:-------------:|:------:|:------:|:---------------:|:--------:|
|
54 |
+
| 8.2359 | 6.04 | 1000 | 8.2170 | 0.0070 |
|
55 |
+
| 7.7137 | 12.07 | 2000 | 7.7007 | 0.0064 |
|
56 |
+
| 6.5277 | 18.11 | 3000 | 6.5254 | 0.0000 |
|
57 |
+
| 6.0375 | 24.14 | 4000 | 6.0532 | 0.0000 |
|
58 |
+
| 5.6908 | 30.18 | 5000 | 5.7100 | 0.0001 |
|
59 |
+
| 5.4294 | 36.22 | 6000 | 5.4758 | 0.0002 |
|
60 |
+
| 5.2161 | 42.25 | 7000 | 5.2891 | 0.0006 |
|
61 |
+
| 5.0151 | 48.29 | 8000 | 5.1152 | 0.0021 |
|
62 |
+
| 4.8349 | 54.33 | 9000 | 4.9847 | 0.0020 |
|
63 |
+
| 4.6358 | 60.36 | 10000 | 4.8754 | 0.0022 |
|
64 |
+
| 4.4326 | 66.4 | 11000 | 4.7809 | 0.0021 |
|
65 |
+
| 4.2632 | 72.43 | 12000 | 4.7416 | 0.0017 |
|
66 |
+
| 4.0415 | 78.47 | 13000 | 4.7503 | 0.0016 |
|
67 |
+
| 3.8196 | 84.51 | 14000 | 4.8472 | 0.0014 |
|
68 |
+
| 3.6207 | 90.54 | 15000 | 5.0215 | 0.0014 |
|
69 |
+
| 3.3163 | 96.58 | 16000 | 5.2939 | 0.0014 |
|
70 |
+
| 3.0377 | 102.62 | 17000 | 5.6685 | 0.0014 |
|
71 |
+
| 2.7272 | 108.65 | 18000 | 6.1649 | 0.0013 |
|
72 |
+
| 2.4319 | 114.69 | 19000 | 6.7556 | 0.0013 |
|
73 |
+
| 2.1647 | 120.72 | 20000 | 7.3951 | 0.0013 |
|
74 |
+
| 1.9001 | 126.76 | 21000 | 8.0823 | 0.0013 |
|
75 |
+
| 1.6708 | 132.8 | 22000 | 8.8230 | 0.0013 |
|
76 |
+
| 1.4762 | 138.83 | 23000 | 9.5335 | 0.0013 |
|
77 |
+
| 1.2833 | 144.87 | 24000 | 10.1973 | 0.0013 |
|
78 |
+
| 1.1451 | 150.91 | 25000 | 10.8213 | 0.0013 |
|
79 |
+
| 1.0251 | 156.94 | 26000 | 11.4402 | 0.0013 |
|
80 |
+
| 0.9164 | 162.98 | 27000 | 11.9995 | 0.0013 |
|
81 |
+
| 0.8174 | 169.01 | 28000 | 12.5680 | 0.0013 |
|
82 |
+
| 0.6862 | 175.05 | 29000 | 13.0050 | 0.0013 |
|
83 |
+
| 0.5738 | 181.09 | 30000 | 13.4692 | 0.0013 |
|
84 |
+
| 0.4524 | 187.12 | 31000 | 13.9220 | 0.0013 |
|
85 |
+
| 0.4252 | 193.16 | 32000 | 14.3340 | 0.0013 |
|
86 |
+
| 0.3952 | 199.2 | 33000 | 14.7961 | 0.0013 |
|
87 |
+
| 0.3684 | 205.23 | 34000 | 15.2421 | 0.0013 |
|
88 |
+
| 0.3338 | 211.27 | 35000 | 15.6433 | 0.0013 |
|
89 |
+
| 0.307 | 217.3 | 36000 | 16.0182 | 0.0013 |
|
90 |
+
| 0.2951 | 223.34 | 37000 | 16.3087 | 0.0013 |
|
91 |
+
| 0.28 | 229.38 | 38000 | 16.6556 | 0.0013 |
|
92 |
+
| 0.2688 | 235.41 | 39000 | 16.9303 | 0.0013 |
|
93 |
+
| 0.2582 | 241.45 | 40000 | 17.2209 | 0.0013 |
|
94 |
+
| 0.238 | 247.48 | 41000 | 17.5311 | 0.0013 |
|
95 |
+
| 0.2261 | 253.52 | 42000 | 17.7731 | 0.0013 |
|
96 |
+
| 0.21 | 259.56 | 43000 | 18.0205 | 0.0013 |
|
97 |
+
| 0.2073 | 265.59 | 44000 | 18.2693 | 0.0013 |
|
98 |
+
| 0.1976 | 271.63 | 45000 | 18.4634 | 0.0013 |
|
99 |
+
| 0.1865 | 277.67 | 46000 | 18.7215 | 0.0012 |
|
100 |
+
| 0.1769 | 283.7 | 47000 | 18.9467 | 0.0013 |
|
101 |
+
| 0.1649 | 289.74 | 48000 | 19.1423 | 0.0013 |
|
102 |
+
| 0.1517 | 295.77 | 49000 | 19.3638 | 0.0013 |
|
103 |
+
| 0.1491 | 301.81 | 50000 | 19.5879 | 0.0013 |
|
104 |
+
| 0.1387 | 307.85 | 51000 | 19.7823 | 0.0013 |
|
105 |
+
| 0.1332 | 313.88 | 52000 | 19.9663 | 0.0013 |
|
106 |
+
| 0.1256 | 319.92 | 53000 | 20.1907 | 0.0013 |
|
107 |
+
| 0.1154 | 325.96 | 54000 | 20.3939 | 0.0013 |
|
108 |
+
| 0.1091 | 331.99 | 55000 | 20.5926 | 0.0013 |
|
109 |
+
| 0.0928 | 338.03 | 56000 | 20.8044 | 0.0013 |
|
110 |
+
| 0.0812 | 344.06 | 57000 | 20.9873 | 0.0013 |
|
111 |
+
| 0.0677 | 350.1 | 58000 | 21.1931 | 0.0013 |
|
112 |
+
| 0.0609 | 356.14 | 59000 | 21.3650 | 0.0013 |
|
113 |
+
| 0.058 | 362.17 | 60000 | 21.5868 | 0.0013 |
|
114 |
+
| 0.0532 | 368.21 | 61000 | 21.7740 | 0.0013 |
|
115 |
+
| 0.0481 | 374.25 | 62000 | 21.9339 | 0.0013 |
|
116 |
+
| 0.0358 | 380.28 | 63000 | 22.1660 | 0.0012 |
|
117 |
+
| 0.0117 | 386.32 | 64000 | 22.4226 | 0.0013 |
|
118 |
+
| 0.0768 | 392.35 | 65000 | 22.2193 | 0.0013 |
|
119 |
+
| 0.0339 | 398.39 | 66000 | 22.3833 | 0.0013 |
|
120 |
+
| 0.0191 | 404.43 | 67000 | 22.5927 | 0.0013 |
|
121 |
+
| 0.0493 | 410.46 | 68000 | 22.6069 | 0.0013 |
|
122 |
+
| 0.0115 | 416.5 | 69000 | 22.8652 | 0.0012 |
|
123 |
+
| 0.0111 | 422.54 | 70000 | 22.9982 | 0.0012 |
|
124 |
+
| 0.1182 | 428.57 | 71000 | 22.6628 | 0.0013 |
|
125 |
+
| 0.0118 | 434.61 | 72000 | 22.9036 | 0.0013 |
|
126 |
+
| 0.0111 | 440.64 | 73000 | 23.0692 | 0.0013 |
|
127 |
+
| 0.011 | 446.68 | 74000 | 23.1857 | 0.0013 |
|
128 |
+
| 0.0386 | 452.72 | 75000 | 22.9263 | 0.0013 |
|
129 |
+
| 0.0109 | 458.75 | 76000 | 23.1548 | 0.0013 |
|
130 |
+
| 0.0109 | 464.79 | 77000 | 23.2761 | 0.0012 |
|
131 |
+
| 0.0108 | 470.82 | 78000 | 23.3763 | 0.0013 |
|
132 |
+
| 0.0131 | 476.86 | 79000 | 23.2048 | 0.0013 |
|
133 |
+
| 0.0108 | 482.9 | 80000 | 23.3772 | 0.0013 |
|
134 |
+
| 0.0106 | 488.93 | 81000 | 23.4733 | 0.0013 |
|
135 |
+
| 0.0106 | 494.97 | 82000 | 23.5654 | 0.0013 |
|
136 |
+
| 0.0242 | 501.01 | 83000 | 23.5459 | 0.0013 |
|
137 |
+
| 0.0104 | 507.04 | 84000 | 23.5695 | 0.0013 |
|
138 |
+
| 0.01 | 513.08 | 85000 | 23.6659 | 0.0013 |
|
139 |
+
| 0.0098 | 519.11 | 86000 | 23.7337 | 0.0013 |
|
140 |
+
| 0.0097 | 525.15 | 87000 | 23.7961 | 0.0013 |
|
141 |
+
| 0.0097 | 531.19 | 88000 | 23.8573 | 0.0013 |
|
142 |
+
| 0.0097 | 537.22 | 89000 | 23.9052 | 0.0013 |
|
143 |
+
| 0.0097 | 543.26 | 90000 | 23.9524 | 0.0013 |
|
144 |
+
| 0.0096 | 549.3 | 91000 | 23.9823 | 0.0013 |
|
145 |
+
| 0.0096 | 555.33 | 92000 | 24.0084 | 0.0013 |
|
146 |
+
| 0.0095 | 561.37 | 93000 | 24.0364 | 0.0013 |
|
147 |
+
| 0.0095 | 567.4 | 94000 | 24.0545 | 0.0013 |
|
148 |
+
| 0.0094 | 573.44 | 95000 | 24.0701 | 0.0013 |
|
149 |
+
| 0.0094 | 579.48 | 96000 | 24.0826 | 0.0013 |
|
150 |
+
| 0.0093 | 585.51 | 97000 | 24.0898 | 0.0013 |
|
151 |
+
| 0.0093 | 591.55 | 98000 | 24.0935 | 0.0013 |
|
152 |
+
| 0.0093 | 597.59 | 99000 | 24.0944 | 0.0013 |
|
153 |
+
| 0.0092 | 603.62 | 100000 | 24.0950 | 0.0013 |
|
154 |
+
|
155 |
+
|
156 |
+
### Framework versions
|
157 |
+
|
158 |
+
- Transformers 4.37.2
|
159 |
+
- Pytorch 2.2.0+cu121
|
160 |
+
- Datasets 2.17.0
|
161 |
+
- Tokenizers 0.15.1
|
all_results.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 603.62,
|
3 |
+
"train_loss": 1.1590602387964726,
|
4 |
+
"train_runtime": 91564.1404,
|
5 |
+
"train_samples_per_second": 52.422,
|
6 |
+
"train_steps_per_second": 1.092
|
7 |
+
}
|
config.json
ADDED
@@ -0,0 +1,26 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"architectures": [
|
3 |
+
"MistralForCausalLM"
|
4 |
+
],
|
5 |
+
"attention_dropout": 0.0,
|
6 |
+
"bos_token_id": 2,
|
7 |
+
"eos_token_id": 3,
|
8 |
+
"hidden_act": "silu",
|
9 |
+
"hidden_size": 512,
|
10 |
+
"initializer_range": 0.02,
|
11 |
+
"intermediate_size": 2048,
|
12 |
+
"max_position_embeddings": 8192,
|
13 |
+
"model_type": "mistral",
|
14 |
+
"num_attention_heads": 8,
|
15 |
+
"num_hidden_layers": 8,
|
16 |
+
"num_key_value_heads": 4,
|
17 |
+
"pad_token_id": 0,
|
18 |
+
"rms_norm_eps": 1e-06,
|
19 |
+
"rope_theta": 10000.0,
|
20 |
+
"sliding_window": 256,
|
21 |
+
"tie_word_embeddings": false,
|
22 |
+
"torch_dtype": "float32",
|
23 |
+
"transformers_version": "4.37.2",
|
24 |
+
"use_cache": true,
|
25 |
+
"vocab_size": 10000
|
26 |
+
}
|
generation_config.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"_from_model_config": true,
|
3 |
+
"bos_token_id": 2,
|
4 |
+
"eos_token_id": 3,
|
5 |
+
"pad_token_id": 0,
|
6 |
+
"transformers_version": "4.37.2"
|
7 |
+
}
|
model.safetensors
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3c1247e57feefd0721e9dc8cef693affcfbc1e2146251ce42407dbcd57b597e1
|
3 |
+
size 166832176
|
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
train_results.json
ADDED
@@ -0,0 +1,7 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
{
|
2 |
+
"epoch": 603.62,
|
3 |
+
"train_loss": 1.1590602387964726,
|
4 |
+
"train_runtime": 91564.1404,
|
5 |
+
"train_samples_per_second": 52.422,
|
6 |
+
"train_steps_per_second": 1.092
|
7 |
+
}
|
trainer_state.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
training_args.bin
ADDED
@@ -0,0 +1,3 @@
|
|
|
|
|
|
|
|
|
1 |
+
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:66c4d5509c43eef8667a3ddeca37d26f95e56853bbe64a5e1fe0c02098435fbe
|
3 |
+
size 4664
|