Nikita Pavlichenko
commited on
Commit
·
9be7cca
1
Parent(s):
77dd825
Remove [BOS] from descriptions
Browse files- README.md +12 -12
- all_results.json +10 -10
- eval_results.json +6 -6
- pytorch_model.bin +1 -1
- train_results.json +4 -4
- trainer_state.json +0 -0
- training_args.bin +1 -1
README.md
CHANGED
@@ -16,8 +16,8 @@ should probably proofread and complete it, then remove this comment. -->
|
|
16 |
|
17 |
This model is a fine-tuned version of [gpt2-large](https://huggingface.co/gpt2-large) on the None dataset.
|
18 |
It achieves the following results on the evaluation set:
|
19 |
-
- Loss: 2.
|
20 |
-
- Accuracy: 0.
|
21 |
|
22 |
## Model description
|
23 |
|
@@ -49,16 +49,16 @@ The following hyperparameters were used during training:
|
|
49 |
|
50 |
| Training Loss | Epoch | Step | Validation Loss | Accuracy |
|
51 |
|:-------------:|:-----:|:-----:|:---------------:|:--------:|
|
52 |
-
| 2.
|
53 |
-
| 2.
|
54 |
-
| 2.
|
55 |
-
| 2.
|
56 |
-
| 2.
|
57 |
-
| 1.
|
58 |
-
| 1.
|
59 |
-
| 1.
|
60 |
-
| 2.
|
61 |
-
| 1.
|
62 |
|
63 |
|
64 |
### Framework versions
|
|
|
16 |
|
17 |
This model is a fine-tuned version of [gpt2-large](https://huggingface.co/gpt2-large) on the None dataset.
|
18 |
It achieves the following results on the evaluation set:
|
19 |
+
- Loss: 2.0808
|
20 |
+
- Accuracy: 0.8556
|
21 |
|
22 |
## Model description
|
23 |
|
|
|
49 |
|
50 |
| Training Loss | Epoch | Step | Validation Loss | Accuracy |
|
51 |
|:-------------:|:-----:|:-----:|:---------------:|:--------:|
|
52 |
+
| 2.4827 | 0.19 | 1000 | 2.4565 | 0.8520 |
|
53 |
+
| 2.6468 | 0.37 | 2000 | 2.3303 | 0.8530 |
|
54 |
+
| 2.5106 | 0.56 | 3000 | 2.2487 | 0.8537 |
|
55 |
+
| 2.0732 | 0.74 | 4000 | 2.2020 | 0.8541 |
|
56 |
+
| 2.159 | 0.93 | 5000 | 2.1594 | 0.8545 |
|
57 |
+
| 1.856 | 1.12 | 6000 | 2.1518 | 0.8548 |
|
58 |
+
| 1.9138 | 1.3 | 7000 | 2.1261 | 0.8551 |
|
59 |
+
| 1.8055 | 1.49 | 8000 | 2.1126 | 0.8552 |
|
60 |
+
| 2.0385 | 1.67 | 9000 | 2.1008 | 0.8554 |
|
61 |
+
| 1.9648 | 1.86 | 10000 | 2.0858 | 0.8555 |
|
62 |
|
63 |
|
64 |
### Framework versions
|
all_results.json
CHANGED
@@ -1,15 +1,15 @@
|
|
1 |
{
|
2 |
"epoch": 2.0,
|
3 |
-
"eval_accuracy": 0.
|
4 |
-
"eval_loss": 2.
|
5 |
-
"eval_runtime":
|
6 |
"eval_samples": 10750,
|
7 |
-
"eval_samples_per_second":
|
8 |
-
"eval_steps_per_second": 10.
|
9 |
-
"perplexity":
|
10 |
-
"train_loss": 2.
|
11 |
-
"train_runtime":
|
12 |
"train_samples": 43003,
|
13 |
-
"train_samples_per_second":
|
14 |
-
"train_steps_per_second": 2.
|
15 |
}
|
|
|
1 |
{
|
2 |
"epoch": 2.0,
|
3 |
+
"eval_accuracy": 0.8555773331979283,
|
4 |
+
"eval_loss": 2.0807912349700928,
|
5 |
+
"eval_runtime": 125.0813,
|
6 |
"eval_samples": 10750,
|
7 |
+
"eval_samples_per_second": 85.944,
|
8 |
+
"eval_steps_per_second": 10.745,
|
9 |
+
"perplexity": 8.010804836289337,
|
10 |
+
"train_loss": 2.1236886782571673,
|
11 |
+
"train_runtime": 4504.4872,
|
12 |
"train_samples": 43003,
|
13 |
+
"train_samples_per_second": 19.093,
|
14 |
+
"train_steps_per_second": 2.387
|
15 |
}
|
eval_results.json
CHANGED
@@ -1,10 +1,10 @@
|
|
1 |
{
|
2 |
"epoch": 2.0,
|
3 |
-
"eval_accuracy": 0.
|
4 |
-
"eval_loss": 2.
|
5 |
-
"eval_runtime":
|
6 |
"eval_samples": 10750,
|
7 |
-
"eval_samples_per_second":
|
8 |
-
"eval_steps_per_second": 10.
|
9 |
-
"perplexity":
|
10 |
}
|
|
|
1 |
{
|
2 |
"epoch": 2.0,
|
3 |
+
"eval_accuracy": 0.8555773331979283,
|
4 |
+
"eval_loss": 2.0807912349700928,
|
5 |
+
"eval_runtime": 125.0813,
|
6 |
"eval_samples": 10750,
|
7 |
+
"eval_samples_per_second": 85.944,
|
8 |
+
"eval_steps_per_second": 10.745,
|
9 |
+
"perplexity": 8.010804836289337
|
10 |
}
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3134045245
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5a854f911fcf67887abe92522ba2b32cb2eb42925c185792b4a089aaae7a38ae
|
3 |
size 3134045245
|
train_results.json
CHANGED
@@ -1,8 +1,8 @@
|
|
1 |
{
|
2 |
"epoch": 2.0,
|
3 |
-
"train_loss": 2.
|
4 |
-
"train_runtime":
|
5 |
"train_samples": 43003,
|
6 |
-
"train_samples_per_second":
|
7 |
-
"train_steps_per_second": 2.
|
8 |
}
|
|
|
1 |
{
|
2 |
"epoch": 2.0,
|
3 |
+
"train_loss": 2.1236886782571673,
|
4 |
+
"train_runtime": 4504.4872,
|
5 |
"train_samples": 43003,
|
6 |
+
"train_samples_per_second": 19.093,
|
7 |
+
"train_steps_per_second": 2.387
|
8 |
}
|
trainer_state.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|
training_args.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 3451
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:0a4f0f455cfe353b38c6958e9dd682a9cb66e8e0afbc23961e3ce8be69bb0ab7
|
3 |
size 3451
|