besimray commited on
Commit
db7abd3
·
verified ·
1 Parent(s): ff23757

Training in progress, step 140, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f266404b11f932cd639e83d974de617b4287bb271af343f15c1f9f42c44ba741
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ee338dc939b7c042fee22bc3023129715453e09ab7503d0fbab68f2a9d9e7429
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:98b00e17d0227c53fea69ecfa2d1996d9c919078652de27239147f7c82b52cef
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1685e33900566f466c94654c0999c9110defe07d1d83c288d3920ab9d12b2a82
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8a0058f5e4eaf51e57150546d35540d117144afacffb0c679cba8d6cbe11058
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07dfcab91d136e6f3877a1517e4a4558c6ab33a709be94712f36072861ae0974
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3511d75105f53c278279e3dade6f856082c8693b0424c0bf567bdcf23028dd2b
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c049ad9892b8ae242eb26f06a6af3edec6b865f6613ddc97103e21f4231f6420
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.027961730957031,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-130",
4
- "epoch": 0.005875573433368738,
5
  "eval_steps": 5,
6
- "global_step": 130,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1133,6 +1133,92 @@
1133
  "eval_samples_per_second": 52.871,
1134
  "eval_steps_per_second": 26.438,
1135
  "step": 130
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1136
  }
1137
  ],
1138
  "logging_steps": 1,
@@ -1161,7 +1247,7 @@
1161
  "attributes": {}
1162
  }
1163
  },
1164
- "total_flos": 1365875097600.0,
1165
  "train_batch_size": 2,
1166
  "trial_name": null,
1167
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.026728630065918,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-140",
4
+ "epoch": 0.006327540620550948,
5
  "eval_steps": 5,
6
+ "global_step": 140,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1133
  "eval_samples_per_second": 52.871,
1134
  "eval_steps_per_second": 26.438,
1135
  "step": 130
1136
+ },
1137
+ {
1138
+ "epoch": 0.005920770152086958,
1139
+ "grad_norm": 0.4459182620048523,
1140
+ "learning_rate": 0.0001713874307839863,
1141
+ "loss": 44.0874,
1142
+ "step": 131
1143
+ },
1144
+ {
1145
+ "epoch": 0.005965966870805179,
1146
+ "grad_norm": 0.5632774233818054,
1147
+ "learning_rate": 0.0001709369921005258,
1148
+ "loss": 44.1085,
1149
+ "step": 132
1150
+ },
1151
+ {
1152
+ "epoch": 0.0060111635895234005,
1153
+ "grad_norm": 0.5518532991409302,
1154
+ "learning_rate": 0.00017048363747516117,
1155
+ "loss": 44.0409,
1156
+ "step": 133
1157
+ },
1158
+ {
1159
+ "epoch": 0.006056360308241622,
1160
+ "grad_norm": 0.5138490200042725,
1161
+ "learning_rate": 0.00017002738554352552,
1162
+ "loss": 44.1078,
1163
+ "step": 134
1164
+ },
1165
+ {
1166
+ "epoch": 0.006101557026959843,
1167
+ "grad_norm": 0.44584622979164124,
1168
+ "learning_rate": 0.00016956825506034867,
1169
+ "loss": 44.1152,
1170
+ "step": 135
1171
+ },
1172
+ {
1173
+ "epoch": 0.006101557026959843,
1174
+ "eval_loss": 11.027368545532227,
1175
+ "eval_runtime": 175.9823,
1176
+ "eval_samples_per_second": 52.943,
1177
+ "eval_steps_per_second": 26.474,
1178
+ "step": 135
1179
+ },
1180
+ {
1181
+ "epoch": 0.006146753745678064,
1182
+ "grad_norm": 0.5159522294998169,
1183
+ "learning_rate": 0.00016910626489868649,
1184
+ "loss": 44.0946,
1185
+ "step": 136
1186
+ },
1187
+ {
1188
+ "epoch": 0.006191950464396285,
1189
+ "grad_norm": 0.4725247323513031,
1190
+ "learning_rate": 0.00016864143404914504,
1191
+ "loss": 44.1131,
1192
+ "step": 137
1193
+ },
1194
+ {
1195
+ "epoch": 0.0062371471831145055,
1196
+ "grad_norm": 0.5374069213867188,
1197
+ "learning_rate": 0.00016817378161909996,
1198
+ "loss": 44.1304,
1199
+ "step": 138
1200
+ },
1201
+ {
1202
+ "epoch": 0.006282343901832727,
1203
+ "grad_norm": 0.44262439012527466,
1204
+ "learning_rate": 0.00016770332683191096,
1205
+ "loss": 44.065,
1206
+ "step": 139
1207
+ },
1208
+ {
1209
+ "epoch": 0.006327540620550948,
1210
+ "grad_norm": 0.5221428871154785,
1211
+ "learning_rate": 0.0001672300890261317,
1212
+ "loss": 44.1053,
1213
+ "step": 140
1214
+ },
1215
+ {
1216
+ "epoch": 0.006327540620550948,
1217
+ "eval_loss": 11.026728630065918,
1218
+ "eval_runtime": 176.1986,
1219
+ "eval_samples_per_second": 52.878,
1220
+ "eval_steps_per_second": 26.442,
1221
+ "step": 140
1222
  }
1223
  ],
1224
  "logging_steps": 1,
 
1247
  "attributes": {}
1248
  }
1249
  },
1250
+ "total_flos": 1470942412800.0,
1251
  "train_batch_size": 2,
1252
  "trial_name": null,
1253
  "trial_params": null