Doowon96 commited on
Commit
4a95a6c
1 Parent(s): e72e661

Training in progress, step 500

Browse files
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd4d89ad8b9454f1d0b670fc7e447f6d0a2f1c8aa327f29fe93915db5e9510ab
3
  size 442518124
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a4da76d70a524eacec579338be444279371ca1212e89b1c3ff12452d480ce07
3
  size 442518124
run-1/checkpoint-500/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd4d89ad8b9454f1d0b670fc7e447f6d0a2f1c8aa327f29fe93915db5e9510ab
3
  size 442518124
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7a4da76d70a524eacec579338be444279371ca1212e89b1c3ff12452d480ce07
3
  size 442518124
run-1/checkpoint-500/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7bfeb4cd61b7f89aef6c894a76e5cf6da2fa925d17a0256c9941bb027f4e1b22
3
  size 885156090
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fcd679f38dd251c90164cd0590ce269851ff9023cbffab70a9465bfde2fc08c5
3
  size 885156090
run-1/checkpoint-500/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8676fbd04f9752ca208326869aadf87a8e201d4c9c4328ccab03b20bbb933463
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f1bda03e9ae8d21d65e8de146ec1797a54afdd7e04e8929ddf985180fe3791c
3
+ size 14244
run-1/checkpoint-500/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b084160dc6107c5c64602fdf374da79d74a6e7e085133c00104fa7a15de63149
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e9793b73f5790244a8460edf3f1bad9bcbee7514c9949ab36a641baa1c5d874a
3
  size 1064
run-1/checkpoint-500/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.8162388724528657,
3
  "best_model_checkpoint": "test-klue/ynat/run-1/checkpoint-500",
4
- "epoch": 0.43782837127845886,
5
  "eval_steps": 50,
6
  "global_step": 500,
7
  "is_hyper_param_search": true,
@@ -9,168 +9,168 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.04,
13
- "learning_rate": 4.707876059953676e-06,
14
- "loss": 1.9595,
15
  "step": 50
16
  },
17
  {
18
- "epoch": 0.04,
19
- "eval_f1": 0.01705140497086806,
20
- "eval_loss": 1.9648141860961914,
21
- "eval_runtime": 13.2976,
22
- "eval_samples_per_second": 684.86,
23
- "eval_steps_per_second": 1.354,
24
  "step": 50
25
  },
26
  {
27
- "epoch": 0.09,
28
- "learning_rate": 9.415752119907352e-06,
29
- "loss": 1.9099,
30
  "step": 100
31
  },
32
  {
33
- "epoch": 0.09,
34
- "eval_f1": 0.07360850081330118,
35
- "eval_loss": 2.032604217529297,
36
- "eval_runtime": 13.3151,
37
- "eval_samples_per_second": 683.961,
38
- "eval_steps_per_second": 1.352,
39
  "step": 100
40
  },
41
  {
42
- "epoch": 0.13,
43
- "learning_rate": 1.4123628179861027e-05,
44
- "loss": 1.6735,
45
  "step": 150
46
  },
47
  {
48
- "epoch": 0.13,
49
- "eval_f1": 0.38393105234961034,
50
- "eval_loss": 1.5790455341339111,
51
- "eval_runtime": 12.7623,
52
- "eval_samples_per_second": 713.584,
53
- "eval_steps_per_second": 1.41,
54
  "step": 150
55
  },
56
  {
57
- "epoch": 0.18,
58
- "learning_rate": 1.8831504239814704e-05,
59
- "loss": 0.9703,
60
  "step": 200
61
  },
62
  {
63
- "epoch": 0.18,
64
- "eval_f1": 0.7721326246350599,
65
- "eval_loss": 0.9168348908424377,
66
- "eval_runtime": 12.5558,
67
- "eval_samples_per_second": 725.324,
68
- "eval_steps_per_second": 1.434,
69
  "step": 200
70
  },
71
  {
72
- "epoch": 0.22,
73
- "learning_rate": 1.866061944634996e-05,
74
- "loss": 0.7516,
75
  "step": 250
76
  },
77
  {
78
- "epoch": 0.22,
79
- "eval_f1": 0.7687755468039507,
80
- "eval_loss": 0.7031986713409424,
81
- "eval_runtime": 12.6864,
82
- "eval_samples_per_second": 717.854,
83
- "eval_steps_per_second": 1.419,
84
  "step": 250
85
  },
86
  {
87
- "epoch": 0.26,
88
- "learning_rate": 1.8489734652885216e-05,
89
- "loss": 0.6465,
90
  "step": 300
91
  },
92
  {
93
- "epoch": 0.26,
94
- "eval_f1": 0.8144320202800481,
95
- "eval_loss": 0.6549465656280518,
96
- "eval_runtime": 12.8685,
97
- "eval_samples_per_second": 707.697,
98
- "eval_steps_per_second": 1.399,
99
  "step": 300
100
  },
101
  {
102
- "epoch": 0.31,
103
- "learning_rate": 1.8318849859420474e-05,
104
- "loss": 0.6678,
105
  "step": 350
106
  },
107
  {
108
- "epoch": 0.31,
109
- "eval_f1": 0.793206481293857,
110
- "eval_loss": 0.7163683176040649,
111
- "eval_runtime": 12.9464,
112
- "eval_samples_per_second": 703.437,
113
- "eval_steps_per_second": 1.39,
114
  "step": 350
115
  },
116
  {
117
- "epoch": 0.35,
118
- "learning_rate": 1.8147965065955732e-05,
119
- "loss": 0.7099,
120
  "step": 400
121
  },
122
  {
123
- "epoch": 0.35,
124
- "eval_f1": 0.8066754005498158,
125
- "eval_loss": 0.7112386226654053,
126
- "eval_runtime": 12.8448,
127
- "eval_samples_per_second": 709.006,
128
- "eval_steps_per_second": 1.401,
129
  "step": 400
130
  },
131
  {
132
- "epoch": 0.39,
133
- "learning_rate": 1.7977080272490987e-05,
134
- "loss": 0.5154,
135
  "step": 450
136
  },
137
  {
138
- "epoch": 0.39,
139
- "eval_f1": 0.8128681854085146,
140
- "eval_loss": 0.6707363724708557,
141
- "eval_runtime": 12.7035,
142
- "eval_samples_per_second": 716.892,
143
- "eval_steps_per_second": 1.417,
144
  "step": 450
145
  },
146
  {
147
- "epoch": 0.44,
148
- "learning_rate": 1.7806195479026245e-05,
149
- "loss": 0.6595,
150
  "step": 500
151
  },
152
  {
153
- "epoch": 0.44,
154
- "eval_f1": 0.8162388724528657,
155
- "eval_loss": 0.6982755064964294,
156
- "eval_runtime": 12.7345,
157
- "eval_samples_per_second": 715.146,
158
- "eval_steps_per_second": 1.413,
159
  "step": 500
160
  }
161
  ],
162
  "logging_steps": 50,
163
- "max_steps": 5710,
164
  "num_input_tokens_seen": 0,
165
  "num_train_epochs": 5,
166
  "save_steps": 500,
167
- "total_flos": 19068185282400.0,
168
- "train_batch_size": 4,
169
  "trial_name": null,
170
  "trial_params": {
171
- "learning_rate": 1.8831504239814704e-05,
172
  "num_train_epochs": 5,
173
- "per_device_train_batch_size": 4,
174
- "seed": 32
175
  }
176
  }
 
1
  {
2
+ "best_metric": 0.8284272507637513,
3
  "best_model_checkpoint": "test-klue/ynat/run-1/checkpoint-500",
4
+ "epoch": 3.4965034965034967,
5
  "eval_steps": 50,
6
  "global_step": 500,
7
  "is_hyper_param_search": true,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.35,
13
+ "learning_rate": 5.859444109925402e-05,
14
+ "loss": 1.497,
15
  "step": 50
16
  },
17
  {
18
+ "epoch": 0.35,
19
+ "eval_f1": 0.6734208042191661,
20
+ "eval_loss": 1.089546799659729,
21
+ "eval_runtime": 13.0037,
22
+ "eval_samples_per_second": 700.339,
23
+ "eval_steps_per_second": 1.384,
24
  "step": 50
25
  },
26
  {
27
+ "epoch": 0.7,
28
+ "learning_rate": 5.418884402412214e-05,
29
+ "loss": 0.5665,
30
  "step": 100
31
  },
32
  {
33
+ "epoch": 0.7,
34
+ "eval_f1": 0.8012954608718272,
35
+ "eval_loss": 0.6758251190185547,
36
+ "eval_runtime": 13.0152,
37
+ "eval_samples_per_second": 699.72,
38
+ "eval_steps_per_second": 1.383,
39
  "step": 100
40
  },
41
  {
42
+ "epoch": 1.05,
43
+ "learning_rate": 4.9783246948990256e-05,
44
+ "loss": 0.5083,
45
  "step": 150
46
  },
47
  {
48
+ "epoch": 1.05,
49
+ "eval_f1": 0.7895493614371508,
50
+ "eval_loss": 0.6960522532463074,
51
+ "eval_runtime": 12.9373,
52
+ "eval_samples_per_second": 703.936,
53
+ "eval_steps_per_second": 1.391,
54
  "step": 150
55
  },
56
  {
57
+ "epoch": 1.4,
58
+ "learning_rate": 4.537764987385838e-05,
59
+ "loss": 0.3865,
60
  "step": 200
61
  },
62
  {
63
+ "epoch": 1.4,
64
+ "eval_f1": 0.8201909522421974,
65
+ "eval_loss": 0.6086650490760803,
66
+ "eval_runtime": 12.7238,
67
+ "eval_samples_per_second": 715.746,
68
+ "eval_steps_per_second": 1.415,
69
  "step": 200
70
  },
71
  {
72
+ "epoch": 1.75,
73
+ "learning_rate": 4.09720527987265e-05,
74
+ "loss": 0.385,
75
  "step": 250
76
  },
77
  {
78
+ "epoch": 1.75,
79
+ "eval_f1": 0.8359538639496097,
80
+ "eval_loss": 0.50257807970047,
81
+ "eval_runtime": 12.8233,
82
+ "eval_samples_per_second": 710.19,
83
+ "eval_steps_per_second": 1.404,
84
  "step": 250
85
  },
86
  {
87
+ "epoch": 2.1,
88
+ "learning_rate": 3.656645572359462e-05,
89
+ "loss": 0.357,
90
  "step": 300
91
  },
92
  {
93
+ "epoch": 2.1,
94
+ "eval_f1": 0.8294020659039051,
95
+ "eval_loss": 0.5500179529190063,
96
+ "eval_runtime": 12.8836,
97
+ "eval_samples_per_second": 706.867,
98
+ "eval_steps_per_second": 1.397,
99
  "step": 300
100
  },
101
  {
102
+ "epoch": 2.45,
103
+ "learning_rate": 3.216085864846273e-05,
104
+ "loss": 0.2337,
105
  "step": 350
106
  },
107
  {
108
+ "epoch": 2.45,
109
+ "eval_f1": 0.8324393645838855,
110
+ "eval_loss": 0.5851877927780151,
111
+ "eval_runtime": 12.9246,
112
+ "eval_samples_per_second": 704.625,
113
+ "eval_steps_per_second": 1.393,
114
  "step": 350
115
  },
116
  {
117
+ "epoch": 2.8,
118
+ "learning_rate": 2.775526157333085e-05,
119
+ "loss": 0.2136,
120
  "step": 400
121
  },
122
  {
123
+ "epoch": 2.8,
124
+ "eval_f1": 0.8449098012505971,
125
+ "eval_loss": 0.5285059809684753,
126
+ "eval_runtime": 12.7922,
127
+ "eval_samples_per_second": 711.919,
128
+ "eval_steps_per_second": 1.407,
129
  "step": 400
130
  },
131
  {
132
+ "epoch": 3.15,
133
+ "learning_rate": 2.3349664498198968e-05,
134
+ "loss": 0.1926,
135
  "step": 450
136
  },
137
  {
138
+ "epoch": 3.15,
139
+ "eval_f1": 0.8313597336118761,
140
+ "eval_loss": 0.6143582463264465,
141
+ "eval_runtime": 12.8169,
142
+ "eval_samples_per_second": 710.548,
143
+ "eval_steps_per_second": 1.404,
144
  "step": 450
145
  },
146
  {
147
+ "epoch": 3.5,
148
+ "learning_rate": 1.894406742306709e-05,
149
+ "loss": 0.1359,
150
  "step": 500
151
  },
152
  {
153
+ "epoch": 3.5,
154
+ "eval_f1": 0.8284272507637513,
155
+ "eval_loss": 0.7059531211853027,
156
+ "eval_runtime": 12.7876,
157
+ "eval_samples_per_second": 712.173,
158
+ "eval_steps_per_second": 1.408,
159
  "step": 500
160
  }
161
  ],
162
  "logging_steps": 50,
163
+ "max_steps": 715,
164
  "num_input_tokens_seen": 0,
165
  "num_train_epochs": 5,
166
  "save_steps": 500,
167
+ "total_flos": 178689205892400.0,
168
+ "train_batch_size": 32,
169
  "trial_name": null,
170
  "trial_params": {
171
+ "learning_rate": 5.859444109925402e-05,
172
  "num_train_epochs": 5,
173
+ "per_device_train_batch_size": 32,
174
+ "seed": 11
175
  }
176
  }
run-1/checkpoint-500/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0aff63987d55bdb29a46347243764a71254aa8765001a1e61e633d8ed4d1fb8a
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bf57395bda7f210838beb0e7bc53917d611090e8578e2da74f8aa9db6d5cf24
3
  size 4728
training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0aff63987d55bdb29a46347243764a71254aa8765001a1e61e633d8ed4d1fb8a
3
  size 4728
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bf57395bda7f210838beb0e7bc53917d611090e8578e2da74f8aa9db6d5cf24
3
  size 4728