JennnDexter commited on
Commit
8d455d2
1 Parent(s): 487ab19

End of training

Browse files
Files changed (5) hide show
  1. README.md +2 -2
  2. all_results.json +16 -0
  3. eval_results.json +10 -0
  4. train_results.json +9 -0
  5. trainer_state.json +121 -0
README.md CHANGED
@@ -14,7 +14,7 @@ model-index:
14
  name: Causal Language Modeling
15
  type: text-generation
16
  dataset:
17
- name: wikitext
18
  type: wikitext
19
  config: wikitext-2-raw-v1
20
  split: validation
@@ -30,7 +30,7 @@ should probably proofread and complete it, then remove this comment. -->
30
 
31
  # clm
32
 
33
- This model is a fine-tuned version of [distilgpt2](https://huggingface.co/distilgpt2) on the wikitext dataset.
34
  It achieves the following results on the evaluation set:
35
  - Loss: 3.4326
36
  - Accuracy: 0.3767
 
14
  name: Causal Language Modeling
15
  type: text-generation
16
  dataset:
17
+ name: wikitext wikitext-2-raw-v1
18
  type: wikitext
19
  config: wikitext-2-raw-v1
20
  split: validation
 
30
 
31
  # clm
32
 
33
+ This model is a fine-tuned version of [distilgpt2](https://huggingface.co/distilgpt2) on the wikitext wikitext-2-raw-v1 dataset.
34
  It achieves the following results on the evaluation set:
35
  - Loss: 3.4326
36
  - Accuracy: 0.3767
all_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.3767106549364614,
4
+ "eval_loss": 3.4325666427612305,
5
+ "eval_runtime": 11.7361,
6
+ "eval_samples": 240,
7
+ "eval_samples_per_second": 20.45,
8
+ "eval_steps_per_second": 2.556,
9
+ "perplexity": 30.955993850684628,
10
+ "total_flos": 605685867872256.0,
11
+ "train_loss": 3.6505329921327787,
12
+ "train_runtime": 308.0636,
13
+ "train_samples": 2318,
14
+ "train_samples_per_second": 7.524,
15
+ "train_steps_per_second": 0.471
16
+ }
eval_results.json ADDED
@@ -0,0 +1,10 @@
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "eval_accuracy": 0.3767106549364614,
4
+ "eval_loss": 3.4325666427612305,
5
+ "eval_runtime": 11.7361,
6
+ "eval_samples": 240,
7
+ "eval_samples_per_second": 20.45,
8
+ "eval_steps_per_second": 2.556,
9
+ "perplexity": 30.955993850684628
10
+ }
train_results.json ADDED
@@ -0,0 +1,9 @@
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 1.0,
3
+ "total_flos": 605685867872256.0,
4
+ "train_loss": 3.6505329921327787,
5
+ "train_runtime": 308.0636,
6
+ "train_samples": 2318,
7
+ "train_samples_per_second": 7.524,
8
+ "train_steps_per_second": 0.471
9
+ }
trainer_state.json ADDED
@@ -0,0 +1,121 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": 0.3767106549364614,
3
+ "best_model_checkpoint": "D:/1_SyscoPY_D/NLP/Data/Transformers_Hug/checkpoint/distilgpt2\\checkpoint-145",
4
+ "epoch": 1.0,
5
+ "eval_steps": 500,
6
+ "global_step": 145,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.07,
13
+ "learning_rate": 1.9999999999999998e-05,
14
+ "loss": 3.9461,
15
+ "step": 10
16
+ },
17
+ {
18
+ "epoch": 0.14,
19
+ "learning_rate": 2.884615384615385e-05,
20
+ "loss": 3.813,
21
+ "step": 20
22
+ },
23
+ {
24
+ "epoch": 0.21,
25
+ "learning_rate": 2.6538461538461538e-05,
26
+ "loss": 3.6952,
27
+ "step": 30
28
+ },
29
+ {
30
+ "epoch": 0.28,
31
+ "learning_rate": 2.423076923076923e-05,
32
+ "loss": 3.6721,
33
+ "step": 40
34
+ },
35
+ {
36
+ "epoch": 0.34,
37
+ "learning_rate": 2.1923076923076924e-05,
38
+ "loss": 3.6339,
39
+ "step": 50
40
+ },
41
+ {
42
+ "epoch": 0.41,
43
+ "learning_rate": 1.9615384615384617e-05,
44
+ "loss": 3.6567,
45
+ "step": 60
46
+ },
47
+ {
48
+ "epoch": 0.48,
49
+ "learning_rate": 1.7307692307692306e-05,
50
+ "loss": 3.6063,
51
+ "step": 70
52
+ },
53
+ {
54
+ "epoch": 0.55,
55
+ "learning_rate": 1.5e-05,
56
+ "loss": 3.6228,
57
+ "step": 80
58
+ },
59
+ {
60
+ "epoch": 0.62,
61
+ "learning_rate": 1.2692307692307693e-05,
62
+ "loss": 3.5844,
63
+ "step": 90
64
+ },
65
+ {
66
+ "epoch": 0.69,
67
+ "learning_rate": 1.0384615384615384e-05,
68
+ "loss": 3.6194,
69
+ "step": 100
70
+ },
71
+ {
72
+ "epoch": 0.76,
73
+ "learning_rate": 8.076923076923077e-06,
74
+ "loss": 3.5918,
75
+ "step": 110
76
+ },
77
+ {
78
+ "epoch": 0.83,
79
+ "learning_rate": 5.76923076923077e-06,
80
+ "loss": 3.5682,
81
+ "step": 120
82
+ },
83
+ {
84
+ "epoch": 0.9,
85
+ "learning_rate": 3.4615384615384617e-06,
86
+ "loss": 3.5482,
87
+ "step": 130
88
+ },
89
+ {
90
+ "epoch": 0.97,
91
+ "learning_rate": 1.153846153846154e-06,
92
+ "loss": 3.585,
93
+ "step": 140
94
+ },
95
+ {
96
+ "epoch": 1.0,
97
+ "eval_accuracy": 0.3767106549364614,
98
+ "eval_loss": 3.4325666427612305,
99
+ "eval_runtime": 12.5136,
100
+ "eval_samples_per_second": 19.179,
101
+ "eval_steps_per_second": 2.397,
102
+ "step": 145
103
+ },
104
+ {
105
+ "epoch": 1.0,
106
+ "step": 145,
107
+ "total_flos": 605685867872256.0,
108
+ "train_loss": 3.6505329921327787,
109
+ "train_runtime": 308.0636,
110
+ "train_samples_per_second": 7.524,
111
+ "train_steps_per_second": 0.471
112
+ }
113
+ ],
114
+ "logging_steps": 10,
115
+ "max_steps": 145,
116
+ "num_train_epochs": 1,
117
+ "save_steps": 100,
118
+ "total_flos": 605685867872256.0,
119
+ "trial_name": null,
120
+ "trial_params": null
121
+ }