Nexspear commited on
Commit
5f405f9
·
verified ·
1 Parent(s): a02c3e7

Training in progress, step 68, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b2cbe0d20bbf52ede83a77655697587a66875a542e98c80071be4452f9155c3
3
  size 161533192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cdb65e69fc9def1d5f815cf4f2045dce584e95e971111ed6abb4ff874e68a063
3
  size 161533192
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:334857a3fc8b6972461313eca4c6a9eb44f4c1f4785cad39e8ed65f90d85c31f
3
  size 82460660
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:78fb0f45d22d2bc0f6aa461c6301aa75516a6b3e6b8d90e3bb9817d70a08d2d8
3
  size 82460660
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f1ebe3e10acf85c9539c73882ea00608414511c9e9886d3b4b4f0092aeddf55
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a3a7ff2c5a4bc3597693072bb0383a8c84c2ae51df7cb2502cab353824233e74
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:97178a71d5acd54714c38f03fc162b58c9ab83f0e2b9f2d42288a4a7b505c2c6
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:57465ffa9dc280f2ea6034fe61064b0208bf36c7b5f569218c0e1296778ee099
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.011463250168577209,
5
  "eval_steps": 34,
6
- "global_step": 34,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -100,6 +100,91 @@
100
  "eval_samples_per_second": 14.632,
101
  "eval_steps_per_second": 1.831,
102
  "step": 34
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
103
  }
104
  ],
105
  "logging_steps": 3,
@@ -119,7 +204,7 @@
119
  "attributes": {}
120
  }
121
  },
122
- "total_flos": 4.753463114622566e+16,
123
  "train_batch_size": 8,
124
  "trial_name": null,
125
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.022926500337154418,
5
  "eval_steps": 34,
6
+ "global_step": 68,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
100
  "eval_samples_per_second": 14.632,
101
  "eval_steps_per_second": 1.831,
102
  "step": 34
103
+ },
104
+ {
105
+ "epoch": 0.012137559002022926,
106
+ "grad_norm": 1.001940131187439,
107
+ "learning_rate": 4.9453690018345144e-05,
108
+ "loss": 0.8057,
109
+ "step": 36
110
+ },
111
+ {
112
+ "epoch": 0.013149022252191504,
113
+ "grad_norm": 1.1807010173797607,
114
+ "learning_rate": 4.932095175695911e-05,
115
+ "loss": 0.8677,
116
+ "step": 39
117
+ },
118
+ {
119
+ "epoch": 0.014160485502360081,
120
+ "grad_norm": 0.7869375348091125,
121
+ "learning_rate": 4.917401074463441e-05,
122
+ "loss": 0.6542,
123
+ "step": 42
124
+ },
125
+ {
126
+ "epoch": 0.015171948752528659,
127
+ "grad_norm": 0.8986634612083435,
128
+ "learning_rate": 4.901295279078431e-05,
129
+ "loss": 0.7597,
130
+ "step": 45
131
+ },
132
+ {
133
+ "epoch": 0.016183412002697236,
134
+ "grad_norm": 0.8910415172576904,
135
+ "learning_rate": 4.883787194871841e-05,
136
+ "loss": 0.7038,
137
+ "step": 48
138
+ },
139
+ {
140
+ "epoch": 0.017194875252865813,
141
+ "grad_norm": 1.069819688796997,
142
+ "learning_rate": 4.864887046071813e-05,
143
+ "loss": 0.7414,
144
+ "step": 51
145
+ },
146
+ {
147
+ "epoch": 0.01820633850303439,
148
+ "grad_norm": 0.9437199831008911,
149
+ "learning_rate": 4.8446058698330115e-05,
150
+ "loss": 0.7289,
151
+ "step": 54
152
+ },
153
+ {
154
+ "epoch": 0.01921780175320297,
155
+ "grad_norm": 1.1073840856552124,
156
+ "learning_rate": 4.822955509791233e-05,
157
+ "loss": 0.7371,
158
+ "step": 57
159
+ },
160
+ {
161
+ "epoch": 0.020229265003371546,
162
+ "grad_norm": 1.1307681798934937,
163
+ "learning_rate": 4.799948609147061e-05,
164
+ "loss": 0.6487,
165
+ "step": 60
166
+ },
167
+ {
168
+ "epoch": 0.02124072825354012,
169
+ "grad_norm": 0.8803877234458923,
170
+ "learning_rate": 4.7755986032825864e-05,
171
+ "loss": 0.6114,
172
+ "step": 63
173
+ },
174
+ {
175
+ "epoch": 0.022252191503708697,
176
+ "grad_norm": 0.9320568442344666,
177
+ "learning_rate": 4.74991971191553e-05,
178
+ "loss": 0.6164,
179
+ "step": 66
180
+ },
181
+ {
182
+ "epoch": 0.022926500337154418,
183
+ "eval_loss": 0.6071863174438477,
184
+ "eval_runtime": 341.6544,
185
+ "eval_samples_per_second": 14.62,
186
+ "eval_steps_per_second": 1.829,
187
+ "step": 68
188
  }
189
  ],
190
  "logging_steps": 3,
 
204
  "attributes": {}
205
  }
206
  },
207
+ "total_flos": 9.506926229245133e+16,
208
  "train_batch_size": 8,
209
  "trial_name": null,
210
  "trial_params": null