neuralwonderland commited on
Commit
23c6803
·
verified ·
1 Parent(s): 67f2a6a

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5161521292bf1c2c1c6a94f6ef1ea578964ac98a8716df9d75c200655db406e6
3
  size 69527352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cc14ba3598e022e1ec0aa1f7019859b9397217ce89e0ec8c1893e3df1bd6f9e0
3
  size 69527352
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6701d4f71cccc16e8ae5337240ad8c4cca069668eccb68fd38f2914e22b7f26
3
  size 139313554
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:166eb816c604acd9e394c55415f278cbfb1fc88186755c3631d2c88a9cc6c698
3
  size 139313554
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f0f0ab9dc096e7c9b452990dd1668acf7c972cd8337ac6f8e760b0447eb61ff5
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7981bb1a19a9fb41a4bd7b894439da0bdd3533a923db038f02cbc626d617d8c7
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05089b5f122589ed831ae5ec00bfa1d74500e2dfd86f0ab8693de9fdc4ff1bb1
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc70abba463551b758dbd2d6203dfa8ce3997fcc152311258dbde48eb38a8273
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.9194591045379639,
3
- "best_model_checkpoint": "./output/checkpoint-1350",
4
- "epoch": 0.1678269517652909,
5
  "eval_steps": 150,
6
- "global_step": 1350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1024,6 +1024,119 @@
1024
  "eval_samples_per_second": 9.355,
1025
  "eval_steps_per_second": 9.355,
1026
  "step": 1350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1027
  }
1028
  ],
1029
  "logging_steps": 10,
@@ -1043,7 +1156,7 @@
1043
  "attributes": {}
1044
  }
1045
  },
1046
- "total_flos": 4.691886465487872e+16,
1047
  "train_batch_size": 16,
1048
  "trial_name": null,
1049
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.897224485874176,
3
+ "best_model_checkpoint": "./output/checkpoint-1500",
4
+ "epoch": 0.18647439085032322,
5
  "eval_steps": 150,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1024
  "eval_samples_per_second": 9.355,
1025
  "eval_steps_per_second": 9.355,
1026
  "step": 1350
1027
+ },
1028
+ {
1029
+ "epoch": 0.16907011437095973,
1030
+ "grad_norm": 1.5011839866638184,
1031
+ "learning_rate": 0.00010569141556167905,
1032
+ "loss": 0.6192,
1033
+ "step": 1360
1034
+ },
1035
+ {
1036
+ "epoch": 0.17031327697662854,
1037
+ "grad_norm": 2.1147801876068115,
1038
+ "learning_rate": 0.00010540089628071566,
1039
+ "loss": 0.6289,
1040
+ "step": 1370
1041
+ },
1042
+ {
1043
+ "epoch": 0.17155643958229735,
1044
+ "grad_norm": 1.8639715909957886,
1045
+ "learning_rate": 0.00010510861351193747,
1046
+ "loss": 0.6891,
1047
+ "step": 1380
1048
+ },
1049
+ {
1050
+ "epoch": 0.1727996021879662,
1051
+ "grad_norm": 1.4501938819885254,
1052
+ "learning_rate": 0.00010481457926994435,
1053
+ "loss": 0.7117,
1054
+ "step": 1390
1055
+ },
1056
+ {
1057
+ "epoch": 0.174042764793635,
1058
+ "grad_norm": 1.1600079536437988,
1059
+ "learning_rate": 0.0001045188056413323,
1060
+ "loss": 0.652,
1061
+ "step": 1400
1062
+ },
1063
+ {
1064
+ "epoch": 0.17528592739930382,
1065
+ "grad_norm": 1.4674696922302246,
1066
+ "learning_rate": 0.00010422130478419676,
1067
+ "loss": 0.7558,
1068
+ "step": 1410
1069
+ },
1070
+ {
1071
+ "epoch": 0.17652909000497266,
1072
+ "grad_norm": 1.767659068107605,
1073
+ "learning_rate": 0.00010392208892763269,
1074
+ "loss": 0.6438,
1075
+ "step": 1420
1076
+ },
1077
+ {
1078
+ "epoch": 0.17777225261064147,
1079
+ "grad_norm": 1.4168020486831665,
1080
+ "learning_rate": 0.00010362117037123204,
1081
+ "loss": 0.6173,
1082
+ "step": 1430
1083
+ },
1084
+ {
1085
+ "epoch": 0.1790154152163103,
1086
+ "grad_norm": 1.6601725816726685,
1087
+ "learning_rate": 0.00010331856148457803,
1088
+ "loss": 0.68,
1089
+ "step": 1440
1090
+ },
1091
+ {
1092
+ "epoch": 0.18025857782197913,
1093
+ "grad_norm": 1.2710611820220947,
1094
+ "learning_rate": 0.00010301427470673678,
1095
+ "loss": 0.6924,
1096
+ "step": 1450
1097
+ },
1098
+ {
1099
+ "epoch": 0.18150174042764794,
1100
+ "grad_norm": 2.1576950550079346,
1101
+ "learning_rate": 0.00010270832254574588,
1102
+ "loss": 0.6917,
1103
+ "step": 1460
1104
+ },
1105
+ {
1106
+ "epoch": 0.18274490303331675,
1107
+ "grad_norm": 1.6391758918762207,
1108
+ "learning_rate": 0.00010240071757810036,
1109
+ "loss": 0.6717,
1110
+ "step": 1470
1111
+ },
1112
+ {
1113
+ "epoch": 0.1839880656389856,
1114
+ "grad_norm": 1.4594990015029907,
1115
+ "learning_rate": 0.00010209147244823564,
1116
+ "loss": 0.7148,
1117
+ "step": 1480
1118
+ },
1119
+ {
1120
+ "epoch": 0.1852312282446544,
1121
+ "grad_norm": 1.277106523513794,
1122
+ "learning_rate": 0.00010178059986800773,
1123
+ "loss": 0.6752,
1124
+ "step": 1490
1125
+ },
1126
+ {
1127
+ "epoch": 0.18647439085032322,
1128
+ "grad_norm": 1.34278404712677,
1129
+ "learning_rate": 0.00010146811261617085,
1130
+ "loss": 0.7066,
1131
+ "step": 1500
1132
+ },
1133
+ {
1134
+ "epoch": 0.18647439085032322,
1135
+ "eval_loss": 0.897224485874176,
1136
+ "eval_runtime": 55.354,
1137
+ "eval_samples_per_second": 9.033,
1138
+ "eval_steps_per_second": 9.033,
1139
+ "step": 1500
1140
  }
1141
  ],
1142
  "logging_steps": 10,
 
1156
  "attributes": {}
1157
  }
1158
  },
1159
+ "total_flos": 5.21121871179264e+16,
1160
  "train_batch_size": 16,
1161
  "trial_name": null,
1162
  "trial_params": null