besimray commited on
Commit
569ac23
·
verified ·
1 Parent(s): 4bb2e0c

Training in progress, step 240, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:28ca0f5984758b0a6bc98d2b9fa3a918322dfc9def650fc2cedeb7b80033ea95
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0288bde0e0adee15a0cc1db17a7449a8eca313277f1482d1ff0cfd7cab7bec74
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:206af652e7f3377e6c0aecfa1c36bcdf508e6b891b0a50e079a3425838b06728
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b4f2703fce501b87978e19b3c7bea6d1358a28ff7aa7dadc828ac8570470d59c
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c41bdc40b014e3f55ca8563ea9de673fa395c6cc93a6cb821cfa7f874d5565c3
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89ab1cc6b91f586f7f2f0ee2f276603d7276cb565be5c7500e1ed6a5a0584bb2
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95105769f4fba521c26686a89d84be3ea680f62dc39a9c9254e214e0a5a6bc5e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:161da1490be78a40723a580aa2c4f8fdf6c7186d93d25be9a77bba0a93a1a4c9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.021036148071289,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-230",
4
- "epoch": 0.010395245305190844,
5
  "eval_steps": 5,
6
- "global_step": 230,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1993,6 +1993,92 @@
1993
  "eval_samples_per_second": 52.862,
1994
  "eval_steps_per_second": 26.434,
1995
  "step": 230
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1996
  }
1997
  ],
1998
  "logging_steps": 1,
@@ -2021,7 +2107,7 @@
2021
  "attributes": {}
2022
  }
2023
  },
2024
- "total_flos": 2416548249600.0,
2025
  "train_batch_size": 2,
2026
  "trial_name": null,
2027
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.020767211914062,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-240",
4
+ "epoch": 0.010847212492373054,
5
  "eval_steps": 5,
6
+ "global_step": 240,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1993
  "eval_samples_per_second": 52.862,
1994
  "eval_steps_per_second": 26.434,
1995
  "step": 230
1996
+ },
1997
+ {
1998
+ "epoch": 0.010440442023909063,
1999
+ "grad_norm": 0.4731612503528595,
2000
+ "learning_rate": 0.00011532674274409159,
2001
+ "loss": 44.1151,
2002
+ "step": 231
2003
+ },
2004
+ {
2005
+ "epoch": 0.010485638742627285,
2006
+ "grad_norm": 0.47020676732063293,
2007
+ "learning_rate": 0.00011469286596248181,
2008
+ "loss": 44.0772,
2009
+ "step": 232
2010
+ },
2011
+ {
2012
+ "epoch": 0.010530835461345506,
2013
+ "grad_norm": 0.4738229215145111,
2014
+ "learning_rate": 0.00011405838521470029,
2015
+ "loss": 44.1274,
2016
+ "step": 233
2017
+ },
2018
+ {
2019
+ "epoch": 0.010576032180063727,
2020
+ "grad_norm": 0.5980152487754822,
2021
+ "learning_rate": 0.00011342332658176555,
2022
+ "loss": 44.0543,
2023
+ "step": 234
2024
+ },
2025
+ {
2026
+ "epoch": 0.010621228898781948,
2027
+ "grad_norm": 0.45920702815055847,
2028
+ "learning_rate": 0.00011278771616845061,
2029
+ "loss": 44.0846,
2030
+ "step": 235
2031
+ },
2032
+ {
2033
+ "epoch": 0.010621228898781948,
2034
+ "eval_loss": 11.02093505859375,
2035
+ "eval_runtime": 176.025,
2036
+ "eval_samples_per_second": 52.93,
2037
+ "eval_steps_per_second": 26.468,
2038
+ "step": 235
2039
+ },
2040
+ {
2041
+ "epoch": 0.01066642561750017,
2042
+ "grad_norm": 0.48931440711021423,
2043
+ "learning_rate": 0.00011215158010221005,
2044
+ "loss": 44.0991,
2045
+ "step": 236
2046
+ },
2047
+ {
2048
+ "epoch": 0.01071162233621839,
2049
+ "grad_norm": 0.4345873296260834,
2050
+ "learning_rate": 0.00011151494453210596,
2051
+ "loss": 44.0491,
2052
+ "step": 237
2053
+ },
2054
+ {
2055
+ "epoch": 0.010756819054936612,
2056
+ "grad_norm": 0.43655380606651306,
2057
+ "learning_rate": 0.00011087783562773311,
2058
+ "loss": 44.0903,
2059
+ "step": 238
2060
+ },
2061
+ {
2062
+ "epoch": 0.010802015773654833,
2063
+ "grad_norm": 0.616533637046814,
2064
+ "learning_rate": 0.00011024027957814314,
2065
+ "loss": 44.1318,
2066
+ "step": 239
2067
+ },
2068
+ {
2069
+ "epoch": 0.010847212492373054,
2070
+ "grad_norm": 0.45536908507347107,
2071
+ "learning_rate": 0.00010960230259076818,
2072
+ "loss": 44.0812,
2073
+ "step": 240
2074
+ },
2075
+ {
2076
+ "epoch": 0.010847212492373054,
2077
+ "eval_loss": 11.020767211914062,
2078
+ "eval_runtime": 176.3636,
2079
+ "eval_samples_per_second": 52.828,
2080
+ "eval_steps_per_second": 26.417,
2081
+ "step": 240
2082
  }
2083
  ],
2084
  "logging_steps": 1,
 
2107
  "attributes": {}
2108
  }
2109
  },
2110
+ "total_flos": 2521615564800.0,
2111
  "train_batch_size": 2,
2112
  "trial_name": null,
2113
  "trial_params": null