besimray commited on
Commit
11dc9d8
·
verified ·
1 Parent(s): bb99718

Training in progress, step 250, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0288bde0e0adee15a0cc1db17a7449a8eca313277f1482d1ff0cfd7cab7bec74
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ef0fe17d2ab3da4040a3c7977b8c6f341f08d2b05df764b34ad276ac546fa5f1
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b4f2703fce501b87978e19b3c7bea6d1358a28ff7aa7dadc828ac8570470d59c
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:347185a8a18472d85b591d6c60188bd66fadd6135ef8c34f34376e84d00d9080
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:89ab1cc6b91f586f7f2f0ee2f276603d7276cb565be5c7500e1ed6a5a0584bb2
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:472f6fb72bea35702df9d08ef02dd4970a6ff81a3410e842d1e8a245dcd7f271
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:161da1490be78a40723a580aa2c4f8fdf6c7186d93d25be9a77bba0a93a1a4c9
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8626d0d5c57fc88c3d7f36cad001868cddc459f981786e9f1343c59e21cde7ac
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.020767211914062,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-240",
4
- "epoch": 0.010847212492373054,
5
  "eval_steps": 5,
6
- "global_step": 240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2079,6 +2079,92 @@
2079
  "eval_samples_per_second": 52.828,
2080
  "eval_steps_per_second": 26.417,
2081
  "step": 240
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2082
  }
2083
  ],
2084
  "logging_steps": 1,
@@ -2107,7 +2193,7 @@
2107
  "attributes": {}
2108
  }
2109
  },
2110
- "total_flos": 2521615564800.0,
2111
  "train_batch_size": 2,
2112
  "trial_name": null,
2113
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.020323753356934,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-250",
4
+ "epoch": 0.011299179679555264,
5
  "eval_steps": 5,
6
+ "global_step": 250,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2079
  "eval_samples_per_second": 52.828,
2080
  "eval_steps_per_second": 26.417,
2081
  "step": 240
2082
+ },
2083
+ {
2084
+ "epoch": 0.010892409211091275,
2085
+ "grad_norm": 0.47256338596343994,
2086
+ "learning_rate": 0.00010896393089034336,
2087
+ "loss": 44.0513,
2088
+ "step": 241
2089
+ },
2090
+ {
2091
+ "epoch": 0.010937605929809496,
2092
+ "grad_norm": 0.42103204131126404,
2093
+ "learning_rate": 0.00010832519071782894,
2094
+ "loss": 44.0399,
2095
+ "step": 242
2096
+ },
2097
+ {
2098
+ "epoch": 0.010982802648527717,
2099
+ "grad_norm": 0.49555832147598267,
2100
+ "learning_rate": 0.00010768610832933168,
2101
+ "loss": 44.1504,
2102
+ "step": 243
2103
+ },
2104
+ {
2105
+ "epoch": 0.011027999367245939,
2106
+ "grad_norm": 0.42800289392471313,
2107
+ "learning_rate": 0.0001070467099950254,
2108
+ "loss": 44.0886,
2109
+ "step": 244
2110
+ },
2111
+ {
2112
+ "epoch": 0.01107319608596416,
2113
+ "grad_norm": 0.6031785607337952,
2114
+ "learning_rate": 0.0001064070219980713,
2115
+ "loss": 44.0548,
2116
+ "step": 245
2117
+ },
2118
+ {
2119
+ "epoch": 0.01107319608596416,
2120
+ "eval_loss": 11.020543098449707,
2121
+ "eval_runtime": 176.1913,
2122
+ "eval_samples_per_second": 52.88,
2123
+ "eval_steps_per_second": 26.443,
2124
+ "step": 245
2125
+ },
2126
+ {
2127
+ "epoch": 0.01111839280468238,
2128
+ "grad_norm": 0.4927026629447937,
2129
+ "learning_rate": 0.00010576707063353746,
2130
+ "loss": 44.0813,
2131
+ "step": 246
2132
+ },
2133
+ {
2134
+ "epoch": 0.0111635895234006,
2135
+ "grad_norm": 0.6148269772529602,
2136
+ "learning_rate": 0.00010512688220731792,
2137
+ "loss": 44.0928,
2138
+ "step": 247
2139
+ },
2140
+ {
2141
+ "epoch": 0.011208786242118822,
2142
+ "grad_norm": 0.4395325779914856,
2143
+ "learning_rate": 0.00010448648303505151,
2144
+ "loss": 44.047,
2145
+ "step": 248
2146
+ },
2147
+ {
2148
+ "epoch": 0.011253982960837043,
2149
+ "grad_norm": 0.4433494806289673,
2150
+ "learning_rate": 0.00010384589944103984,
2151
+ "loss": 44.1,
2152
+ "step": 249
2153
+ },
2154
+ {
2155
+ "epoch": 0.011299179679555264,
2156
+ "grad_norm": 0.6447661519050598,
2157
+ "learning_rate": 0.00010320515775716555,
2158
+ "loss": 44.0861,
2159
+ "step": 250
2160
+ },
2161
+ {
2162
+ "epoch": 0.011299179679555264,
2163
+ "eval_loss": 11.020323753356934,
2164
+ "eval_runtime": 176.3276,
2165
+ "eval_samples_per_second": 52.839,
2166
+ "eval_steps_per_second": 26.422,
2167
+ "step": 250
2168
  }
2169
  ],
2170
  "logging_steps": 1,
 
2193
  "attributes": {}
2194
  }
2195
  },
2196
+ "total_flos": 2626682880000.0,
2197
  "train_batch_size": 2,
2198
  "trial_name": null,
2199
  "trial_params": null