besimray commited on
Commit
6ed0ebc
·
verified ·
1 Parent(s): 9f0887a

Training in progress, step 130, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2508f90eb61b7bd9b166d1fa329b629bcde6d993f69cb3bb72b7dbc1db24df2
3
  size 125048
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f266404b11f932cd639e83d974de617b4287bb271af343f15c1f9f42c44ba741
3
  size 125048
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b2804e1a21b5a74c0549bffca0ed140c171b41145b99683637f8580788f846fd
3
  size 162868
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:98b00e17d0227c53fea69ecfa2d1996d9c919078652de27239147f7c82b52cef
3
  size 162868
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c8a971b3f240c33c89e901f8be5e254f5380f922f188a9eada4d5bb95507b511
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f8a0058f5e4eaf51e57150546d35540d117144afacffb0c679cba8d6cbe11058
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7e549a35cd7e532c378c88126565a201f68fd1d73868bbbba082980ce1de2c27
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3511d75105f53c278279e3dade6f856082c8693b0424c0bf567bdcf23028dd2b
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 11.029101371765137,
3
- "best_model_checkpoint": "miner_id_24/checkpoint-120",
4
- "epoch": 0.005423606246186527,
5
  "eval_steps": 5,
6
- "global_step": 120,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1047,6 +1047,92 @@
1047
  "eval_samples_per_second": 52.838,
1048
  "eval_steps_per_second": 26.422,
1049
  "step": 120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1050
  }
1051
  ],
1052
  "logging_steps": 1,
@@ -1075,7 +1161,7 @@
1075
  "attributes": {}
1076
  }
1077
  },
1078
- "total_flos": 1260807782400.0,
1079
  "train_batch_size": 2,
1080
  "trial_name": null,
1081
  "trial_params": null
 
1
  {
2
+ "best_metric": 11.027961730957031,
3
+ "best_model_checkpoint": "miner_id_24/checkpoint-130",
4
+ "epoch": 0.005875573433368738,
5
  "eval_steps": 5,
6
+ "global_step": 130,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1047
  "eval_samples_per_second": 52.838,
1048
  "eval_steps_per_second": 26.422,
1049
  "step": 120
1050
+ },
1051
+ {
1052
+ "epoch": 0.005468802964904748,
1053
+ "grad_norm": 0.43057698011398315,
1054
+ "learning_rate": 0.00017572742764761055,
1055
+ "loss": 44.1271,
1056
+ "step": 121
1057
+ },
1058
+ {
1059
+ "epoch": 0.005513999683622969,
1060
+ "grad_norm": 0.5054545402526855,
1061
+ "learning_rate": 0.00017530714660036112,
1062
+ "loss": 44.1574,
1063
+ "step": 122
1064
+ },
1065
+ {
1066
+ "epoch": 0.00555919640234119,
1067
+ "grad_norm": 0.47395941615104675,
1068
+ "learning_rate": 0.00017488376997127283,
1069
+ "loss": 44.0802,
1070
+ "step": 123
1071
+ },
1072
+ {
1073
+ "epoch": 0.005604393121059411,
1074
+ "grad_norm": 0.5438507795333862,
1075
+ "learning_rate": 0.0001744573151637007,
1076
+ "loss": 44.0974,
1077
+ "step": 124
1078
+ },
1079
+ {
1080
+ "epoch": 0.005649589839777632,
1081
+ "grad_norm": 0.5694723129272461,
1082
+ "learning_rate": 0.00017402779970753155,
1083
+ "loss": 44.1329,
1084
+ "step": 125
1085
+ },
1086
+ {
1087
+ "epoch": 0.005649589839777632,
1088
+ "eval_loss": 11.028435707092285,
1089
+ "eval_runtime": 176.0545,
1090
+ "eval_samples_per_second": 52.921,
1091
+ "eval_steps_per_second": 26.463,
1092
+ "step": 125
1093
+ },
1094
+ {
1095
+ "epoch": 0.005694786558495853,
1096
+ "grad_norm": 0.49188655614852905,
1097
+ "learning_rate": 0.0001735952412584635,
1098
+ "loss": 44.0859,
1099
+ "step": 126
1100
+ },
1101
+ {
1102
+ "epoch": 0.005739983277214074,
1103
+ "grad_norm": 0.5955361127853394,
1104
+ "learning_rate": 0.00017315965759728014,
1105
+ "loss": 44.0938,
1106
+ "step": 127
1107
+ },
1108
+ {
1109
+ "epoch": 0.0057851799959322955,
1110
+ "grad_norm": 0.4358704090118408,
1111
+ "learning_rate": 0.00017272106662911973,
1112
+ "loss": 44.1165,
1113
+ "step": 128
1114
+ },
1115
+ {
1116
+ "epoch": 0.005830376714650517,
1117
+ "grad_norm": 0.4302980899810791,
1118
+ "learning_rate": 0.00017227948638273916,
1119
+ "loss": 44.1088,
1120
+ "step": 129
1121
+ },
1122
+ {
1123
+ "epoch": 0.005875573433368738,
1124
+ "grad_norm": 0.5749801397323608,
1125
+ "learning_rate": 0.00017183493500977278,
1126
+ "loss": 44.1311,
1127
+ "step": 130
1128
+ },
1129
+ {
1130
+ "epoch": 0.005875573433368738,
1131
+ "eval_loss": 11.027961730957031,
1132
+ "eval_runtime": 176.2218,
1133
+ "eval_samples_per_second": 52.871,
1134
+ "eval_steps_per_second": 26.438,
1135
+ "step": 130
1136
  }
1137
  ],
1138
  "logging_steps": 1,
 
1161
  "attributes": {}
1162
  }
1163
  },
1164
+ "total_flos": 1365875097600.0,
1165
  "train_batch_size": 2,
1166
  "trial_name": null,
1167
  "trial_params": null