sara-nabhani commited on
Commit
2564339
·
1 Parent(s): 3304798

Training in progress, step 1000

Browse files
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e94aa42a134922511d37b0f7b8a3683d5fd7d1d79d9781b3117b09ca7273b936
3
  size 2843370360
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4b9f72aa7b44e7a9ec47f89b7774912f0581af69d82b5930eed4b6eb9f9c9365
3
  size 2843370360
last-checkpoint/pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4df99eed370c9cb39b1ec5ed20aac79dc213c5aade1e76816b891752f51827c
3
  size 1421660981
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:176173d359247b923d0648004a1d4c4e72a08cb2741671127858595923d9c5d0
3
  size 1421660981
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:19f993d576aad282362d3ca490d6b3442b5e729ea10296bb1d19b837eadc6c8f
3
- size 14575
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c61056c1b86433bea751294e0b85aebf024f3ac7d72392effbce9faca4759620
3
+ size 14639
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8636707e906db799535a0e24d8b11f4682008d2f9dce23ea4add566cf191f0eb
3
  size 627
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:68a6f395aca12d068a355806ad25a39cc1cf9ac8d7df16949ceeaa9f78f9ba22
3
  size 627
last-checkpoint/trainer_state.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
- "best_metric": 0.13967554851569386,
3
- "best_model_checkpoint": "/home2/s5432073/language-tech-project/results/ltp-roberta-large-default/checkpoint-200",
4
- "epoch": 1.183431952662722,
5
- "global_step": 200,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
@@ -43,11 +43,159 @@
43
  "eval_samples_per_second": 422.907,
44
  "eval_steps_per_second": 13.383,
45
  "step": 200
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
46
  }
47
  ],
48
  "max_steps": 1690,
49
  "num_train_epochs": 10,
50
- "total_flos": 1089865359765408.0,
51
  "trial_name": null,
52
  "trial_params": null
53
  }
 
1
  {
2
+ "best_metric": 0.3712866943650337,
3
+ "best_model_checkpoint": "/home2/s5432073/language-tech-project/results/ltp-roberta-large-default/checkpoint-1000",
4
+ "epoch": 5.9171597633136095,
5
+ "global_step": 1000,
6
  "is_hyper_param_search": false,
7
  "is_local_process_zero": true,
8
  "is_world_process_zero": true,
 
43
  "eval_samples_per_second": 422.907,
44
  "eval_steps_per_second": 13.383,
45
  "step": 200
46
+ },
47
+ {
48
+ "epoch": 2.37,
49
+ "learning_rate": 8.037383177570094e-06,
50
+ "loss": 0.3287,
51
+ "step": 400
52
+ },
53
+ {
54
+ "epoch": 2.37,
55
+ "eval_f1": 0.26059605664667,
56
+ "eval_f1_all": [
57
+ 0.4311377245508982,
58
+ 0.33333333333333337,
59
+ 0.0,
60
+ 0.0,
61
+ 0.6143001007049347,
62
+ 0.03592814371257485,
63
+ 0.2823529411764706,
64
+ 0.0,
65
+ 0.7357440890125174,
66
+ 0.55,
67
+ 0.1650485436893204,
68
+ 0.3709677419354839,
69
+ 0.0,
70
+ 0.0,
71
+ 0.11209439528023599,
72
+ 0.0,
73
+ 0.6120218579234973,
74
+ 0.5846153846153846,
75
+ 0.034934497816593885,
76
+ 0.3494423791821561
77
+ ],
78
+ "eval_loss": 0.32891085743904114,
79
+ "eval_runtime": 3.0437,
80
+ "eval_samples_per_second": 622.918,
81
+ "eval_steps_per_second": 19.713,
82
+ "step": 400
83
+ },
84
+ {
85
+ "epoch": 3.55,
86
+ "learning_rate": 6.791277258566978e-06,
87
+ "loss": 0.2952,
88
+ "step": 600
89
+ },
90
+ {
91
+ "epoch": 3.55,
92
+ "eval_f1": 0.32608326603750615,
93
+ "eval_f1_all": [
94
+ 0.5166240409207161,
95
+ 0.5852478839177752,
96
+ 0.0,
97
+ 0.0,
98
+ 0.6099585062240663,
99
+ 0.0588235294117647,
100
+ 0.29069767441860467,
101
+ 0.0,
102
+ 0.745417515274949,
103
+ 0.619047619047619,
104
+ 0.3628691983122363,
105
+ 0.4564564564564564,
106
+ 0.0,
107
+ 0.015625,
108
+ 0.49217935349322217,
109
+ 0.04332129963898917,
110
+ 0.6925515055467513,
111
+ 0.6268656716417911,
112
+ 0.05714285714285715,
113
+ 0.3488372093023256
114
+ ],
115
+ "eval_loss": 0.31139805912971497,
116
+ "eval_runtime": 2.9889,
117
+ "eval_samples_per_second": 634.353,
118
+ "eval_steps_per_second": 20.074,
119
+ "step": 600
120
+ },
121
+ {
122
+ "epoch": 4.73,
123
+ "learning_rate": 5.545171339563863e-06,
124
+ "loss": 0.2745,
125
+ "step": 800
126
+ },
127
+ {
128
+ "epoch": 4.73,
129
+ "eval_f1": 0.35306069701657317,
130
+ "eval_f1_all": [
131
+ 0.5316455696202532,
132
+ 0.575,
133
+ 0.02857142857142857,
134
+ 0.125,
135
+ 0.6256306760847629,
136
+ 0.1005586592178771,
137
+ 0.32222222222222224,
138
+ 0.0,
139
+ 0.7624595469255664,
140
+ 0.6393442622950821,
141
+ 0.4034334763948497,
142
+ 0.5371900826446281,
143
+ 0.0,
144
+ 0.015503875968992248,
145
+ 0.5170630816959669,
146
+ 0.08304498269896193,
147
+ 0.6463620981387478,
148
+ 0.6822429906542056,
149
+ 0.07407407407407407,
150
+ 0.39186691312384475
151
+ ],
152
+ "eval_loss": 0.3067249059677124,
153
+ "eval_runtime": 2.9637,
154
+ "eval_samples_per_second": 639.733,
155
+ "eval_steps_per_second": 20.245,
156
+ "step": 800
157
+ },
158
+ {
159
+ "epoch": 5.92,
160
+ "learning_rate": 4.299065420560748e-06,
161
+ "loss": 0.2575,
162
+ "step": 1000
163
+ },
164
+ {
165
+ "epoch": 5.92,
166
+ "eval_f1": 0.3712866943650337,
167
+ "eval_f1_all": [
168
+ 0.5213032581453635,
169
+ 0.5556978233034571,
170
+ 0.1081081081081081,
171
+ 0.1391304347826087,
172
+ 0.6363636363636365,
173
+ 0.1005586592178771,
174
+ 0.2840909090909091,
175
+ 0.0,
176
+ 0.7576530612244897,
177
+ 0.6426076833527357,
178
+ 0.4453441295546559,
179
+ 0.5411140583554377,
180
+ 0.0,
181
+ 0.015037593984962405,
182
+ 0.5839929639401935,
183
+ 0.1921921921921922,
184
+ 0.641573994867408,
185
+ 0.6936936936936936,
186
+ 0.12648221343873517,
187
+ 0.4407894736842105
188
+ ],
189
+ "eval_loss": 0.3082274794578552,
190
+ "eval_runtime": 2.9599,
191
+ "eval_samples_per_second": 640.56,
192
+ "eval_steps_per_second": 20.271,
193
+ "step": 1000
194
  }
195
  ],
196
  "max_steps": 1690,
197
  "num_train_epochs": 10,
198
+ "total_flos": 5502978001057296.0,
199
  "trial_name": null,
200
  "trial_params": null
201
  }
pytorch_model.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a4df99eed370c9cb39b1ec5ed20aac79dc213c5aade1e76816b891752f51827c
3
  size 1421660981
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:176173d359247b923d0648004a1d4c4e72a08cb2741671127858595923d9c5d0
3
  size 1421660981