ysn-rfd commited on
Commit
ecb5917
1 Parent(s): 5500979

Upload 16 files

Browse files
results/checkpoint-261/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sshleifer/tiny-gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 2,
16
+ "n_head": 2,
17
+ "n_inner": null,
18
+ "n_layer": 2,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.44.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
results/checkpoint-261/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.44.0"
6
+ }
results/checkpoint-261/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:640601ba0b3c1e4fd7165066bc40d9f8763ab0bf0f6100dd1774347029fa9d6e
3
+ size 413296
results/checkpoint-261/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f681331a803a387caafb9c32ec9a0db1b3488ad53149947c96b53d572342b75e
3
+ size 843914
results/checkpoint-261/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f479ad266ec74a3abb9fbbca69099f40c96f82543253302c1a6863078f93d5bf
3
+ size 13990
results/checkpoint-261/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3c35e5bd9ecf468d46bff56ff4d224e988cf593ec3cefa0ab68a1aa4aa0dca58
3
+ size 1064
results/checkpoint-261/trainer_state.json ADDED
@@ -0,0 +1,32 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 3.0,
5
+ "eval_steps": 500,
6
+ "global_step": 261,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [],
11
+ "logging_steps": 500,
12
+ "max_steps": 261,
13
+ "num_input_tokens_seen": 0,
14
+ "num_train_epochs": 3,
15
+ "save_steps": 10000,
16
+ "stateful_callbacks": {
17
+ "TrainerControl": {
18
+ "args": {
19
+ "should_epoch_stop": false,
20
+ "should_evaluate": false,
21
+ "should_log": false,
22
+ "should_save": true,
23
+ "should_training_stop": true
24
+ },
25
+ "attributes": {}
26
+ }
27
+ },
28
+ "total_flos": 121171968.0,
29
+ "train_batch_size": 4,
30
+ "trial_name": null,
31
+ "trial_params": null
32
+ }
results/checkpoint-261/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c5a9801ba1718affc6aa74261559536f39e94ea7a2abc8fc5a228f92471ec808
3
+ size 5112
results/checkpoint-780/config.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "sshleifer/tiny-gpt2",
3
+ "activation_function": "gelu_new",
4
+ "architectures": [
5
+ "GPT2LMHeadModel"
6
+ ],
7
+ "attn_pdrop": 0.1,
8
+ "bos_token_id": 50256,
9
+ "embd_pdrop": 0.1,
10
+ "eos_token_id": 50256,
11
+ "initializer_range": 0.02,
12
+ "layer_norm_epsilon": 1e-05,
13
+ "model_type": "gpt2",
14
+ "n_ctx": 1024,
15
+ "n_embd": 2,
16
+ "n_head": 2,
17
+ "n_inner": null,
18
+ "n_layer": 2,
19
+ "n_positions": 1024,
20
+ "reorder_and_upcast_attn": false,
21
+ "resid_pdrop": 0.1,
22
+ "scale_attn_by_inverse_layer_idx": false,
23
+ "scale_attn_weights": true,
24
+ "summary_activation": null,
25
+ "summary_first_dropout": 0.1,
26
+ "summary_proj_to_labels": true,
27
+ "summary_type": "cls_index",
28
+ "summary_use_proj": true,
29
+ "task_specific_params": {
30
+ "text-generation": {
31
+ "do_sample": true,
32
+ "max_length": 50
33
+ }
34
+ },
35
+ "torch_dtype": "float32",
36
+ "transformers_version": "4.44.0",
37
+ "use_cache": true,
38
+ "vocab_size": 50257
39
+ }
results/checkpoint-780/generation_config.json ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 50256,
4
+ "eos_token_id": 50256,
5
+ "transformers_version": "4.44.0"
6
+ }
results/checkpoint-780/model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5433a97ae6e3065d18a7dfccc21a2a57756708fde717b57b5d411fcb02570b3c
3
+ size 413296
results/checkpoint-780/optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ea04690b7cf7aac0cbcd904574d8af9ed2d8c6f5b1cf3b0ab0f7b3e5a9a99f6f
3
+ size 843914
results/checkpoint-780/rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:df5fc66fffd1f067af01dd93904daffa117b42757d39da87dd8616556f5d8ae8
3
+ size 13990
results/checkpoint-780/scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04724f42bf4812cc2716a9b914df39dc353c86cdbfee5c8198ced0bfcbe76d6c
3
+ size 1064
results/checkpoint-780/trainer_state.json ADDED
@@ -0,0 +1,154 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 5.0,
5
+ "eval_steps": 100,
6
+ "global_step": 780,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.6410256410256411,
13
+ "grad_norm": 0.5242204666137695,
14
+ "learning_rate": 0.002,
15
+ "loss": 10.7409,
16
+ "step": 100
17
+ },
18
+ {
19
+ "epoch": 0.6410256410256411,
20
+ "eval_loss": NaN,
21
+ "eval_perplexity": 39350.50390625,
22
+ "eval_runtime": 6.6038,
23
+ "eval_samples_per_second": 5.3,
24
+ "eval_steps_per_second": 2.726,
25
+ "step": 100
26
+ },
27
+ {
28
+ "epoch": 1.282051282051282,
29
+ "grad_norm": 1.9119541645050049,
30
+ "learning_rate": 0.004,
31
+ "loss": 9.6783,
32
+ "step": 200
33
+ },
34
+ {
35
+ "epoch": 1.282051282051282,
36
+ "eval_loss": NaN,
37
+ "eval_perplexity": 5642.44091796875,
38
+ "eval_runtime": 6.5477,
39
+ "eval_samples_per_second": 5.345,
40
+ "eval_steps_per_second": 2.749,
41
+ "step": 200
42
+ },
43
+ {
44
+ "epoch": 1.9230769230769231,
45
+ "grad_norm": 1.3237804174423218,
46
+ "learning_rate": 0.006,
47
+ "loss": 6.8212,
48
+ "step": 300
49
+ },
50
+ {
51
+ "epoch": 1.9230769230769231,
52
+ "eval_loss": NaN,
53
+ "eval_perplexity": 653.0867309570312,
54
+ "eval_runtime": 6.3668,
55
+ "eval_samples_per_second": 5.497,
56
+ "eval_steps_per_second": 2.827,
57
+ "step": 300
58
+ },
59
+ {
60
+ "epoch": 2.564102564102564,
61
+ "grad_norm": 1.503178596496582,
62
+ "learning_rate": 0.008,
63
+ "loss": 5.5177,
64
+ "step": 400
65
+ },
66
+ {
67
+ "epoch": 2.564102564102564,
68
+ "eval_loss": NaN,
69
+ "eval_perplexity": 1196.52783203125,
70
+ "eval_runtime": 6.6136,
71
+ "eval_samples_per_second": 5.292,
72
+ "eval_steps_per_second": 2.722,
73
+ "step": 400
74
+ },
75
+ {
76
+ "epoch": 3.2051282051282053,
77
+ "grad_norm": 3.9715425968170166,
78
+ "learning_rate": 0.01,
79
+ "loss": 5.4345,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 3.2051282051282053,
84
+ "eval_loss": NaN,
85
+ "eval_perplexity": 655.4906005859375,
86
+ "eval_runtime": 6.6124,
87
+ "eval_samples_per_second": 5.293,
88
+ "eval_steps_per_second": 2.722,
89
+ "step": 500
90
+ },
91
+ {
92
+ "epoch": 3.8461538461538463,
93
+ "grad_norm": 1.5786511898040771,
94
+ "learning_rate": 0.006428571428571429,
95
+ "loss": 5.3271,
96
+ "step": 600
97
+ },
98
+ {
99
+ "epoch": 3.8461538461538463,
100
+ "eval_loss": NaN,
101
+ "eval_perplexity": 631.6055297851562,
102
+ "eval_runtime": 6.3755,
103
+ "eval_samples_per_second": 5.49,
104
+ "eval_steps_per_second": 2.823,
105
+ "step": 600
106
+ },
107
+ {
108
+ "epoch": 4.487179487179487,
109
+ "grad_norm": 0.9853035807609558,
110
+ "learning_rate": 0.002857142857142857,
111
+ "loss": 5.2154,
112
+ "step": 700
113
+ },
114
+ {
115
+ "epoch": 4.487179487179487,
116
+ "eval_loss": NaN,
117
+ "eval_perplexity": 642.812255859375,
118
+ "eval_runtime": 6.8174,
119
+ "eval_samples_per_second": 5.134,
120
+ "eval_steps_per_second": 2.64,
121
+ "step": 700
122
+ }
123
+ ],
124
+ "logging_steps": 100,
125
+ "max_steps": 780,
126
+ "num_input_tokens_seen": 0,
127
+ "num_train_epochs": 5,
128
+ "save_steps": 2000,
129
+ "stateful_callbacks": {
130
+ "EarlyStoppingCallback": {
131
+ "args": {
132
+ "early_stopping_patience": 3,
133
+ "early_stopping_threshold": 0.01
134
+ },
135
+ "attributes": {
136
+ "early_stopping_patience_counter": 0
137
+ }
138
+ },
139
+ "TrainerControl": {
140
+ "args": {
141
+ "should_epoch_stop": false,
142
+ "should_evaluate": false,
143
+ "should_log": false,
144
+ "should_save": true,
145
+ "should_training_stop": true
146
+ },
147
+ "attributes": {}
148
+ }
149
+ },
150
+ "total_flos": 181524480.0,
151
+ "train_batch_size": 2,
152
+ "trial_name": null,
153
+ "trial_params": null
154
+ }
results/checkpoint-780/training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc26b2a2a9d0c96596a22a65fedd593e95159c88a5f6f2d7c6f5f9c400c7679d
3
+ size 5112