neuralwonderland commited on
Commit
b5f0da7
·
verified ·
1 Parent(s): d351d6f

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f1952cfee702785548c3cccb8349c34cbaac4b556d8ea73e0ffd0d246f429845
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4bcb6af540d25c5f5d31433cb07385ba68f10302f864604c5a1bf5979e12e620
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0f8c1c7a23540216601b312a1f8adf015dce14da9673e2c02819b7b51a5b16e4
3
  size 640010002
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:40aab3db494a2674de0a1f38b3e5db81c49bd4a2f9f640a17c73293e45c2a34d
3
  size 640010002
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ae503cf838012622da6ce9fc5ebd1e2dd4d76dfb7d8241850497e552b44d7e99
3
  size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:741265e26d07c3333e143245ac09223c25aeb523ed846ef93534220bcf1841a0
3
  size 14308
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18c88c03c83322e1a691e866b652d06d19003ce345c89230ff72194c865749c3
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e2bb55e94fb33eeb522c3f2d754bc51aade7ec9f99182aa3065529c82c6dab5
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.40071919560432434,
3
- "best_model_checkpoint": "./output/checkpoint-1200",
4
- "epoch": 0.15266312337442045,
5
  "eval_steps": 150,
6
- "global_step": 1350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1024,6 +1024,119 @@
1024
  "eval_samples_per_second": 13.931,
1025
  "eval_steps_per_second": 13.931,
1026
  "step": 1350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1027
  }
1028
  ],
1029
  "logging_steps": 10,
@@ -1043,7 +1156,7 @@
1043
  "attributes": {}
1044
  }
1045
  },
1046
- "total_flos": 1.8419020158880973e+17,
1047
  "train_batch_size": 8,
1048
  "trial_name": null,
1049
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.3870772421360016,
3
+ "best_model_checkpoint": "./output/checkpoint-1500",
4
+ "epoch": 0.16962569263824495,
5
  "eval_steps": 150,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1024
  "eval_samples_per_second": 13.931,
1025
  "eval_steps_per_second": 13.931,
1026
  "step": 1350
1027
+ },
1028
+ {
1029
+ "epoch": 0.15379396132534207,
1030
+ "grad_norm": 3.2196218967437744,
1031
+ "learning_rate": 6.341484933700744e-06,
1032
+ "loss": 0.2629,
1033
+ "step": 1360
1034
+ },
1035
+ {
1036
+ "epoch": 0.1549247992762637,
1037
+ "grad_norm": 2.702026128768921,
1038
+ "learning_rate": 6.32405377684294e-06,
1039
+ "loss": 0.1325,
1040
+ "step": 1370
1041
+ },
1042
+ {
1043
+ "epoch": 0.15605563722718535,
1044
+ "grad_norm": 1.581925630569458,
1045
+ "learning_rate": 6.306516810716249e-06,
1046
+ "loss": 0.1481,
1047
+ "step": 1380
1048
+ },
1049
+ {
1050
+ "epoch": 0.15718647517810697,
1051
+ "grad_norm": 6.646518230438232,
1052
+ "learning_rate": 6.288874756196662e-06,
1053
+ "loss": 0.2308,
1054
+ "step": 1390
1055
+ },
1056
+ {
1057
+ "epoch": 0.1583173131290286,
1058
+ "grad_norm": 5.573264122009277,
1059
+ "learning_rate": 6.271128338479939e-06,
1060
+ "loss": 0.2235,
1061
+ "step": 1400
1062
+ },
1063
+ {
1064
+ "epoch": 0.15944815107995025,
1065
+ "grad_norm": 1.5918960571289062,
1066
+ "learning_rate": 6.253278287051806e-06,
1067
+ "loss": 0.1238,
1068
+ "step": 1410
1069
+ },
1070
+ {
1071
+ "epoch": 0.16057898903087187,
1072
+ "grad_norm": 11.685979843139648,
1073
+ "learning_rate": 6.235325335657962e-06,
1074
+ "loss": 0.1953,
1075
+ "step": 1420
1076
+ },
1077
+ {
1078
+ "epoch": 0.1617098269817935,
1079
+ "grad_norm": 1.055677890777588,
1080
+ "learning_rate": 6.217270222273923e-06,
1081
+ "loss": 0.1109,
1082
+ "step": 1430
1083
+ },
1084
+ {
1085
+ "epoch": 0.16284066493271515,
1086
+ "grad_norm": 4.812380313873291,
1087
+ "learning_rate": 6.1991136890746825e-06,
1088
+ "loss": 0.164,
1089
+ "step": 1440
1090
+ },
1091
+ {
1092
+ "epoch": 0.16397150288363677,
1093
+ "grad_norm": 3.4639365673065186,
1094
+ "learning_rate": 6.180856482404208e-06,
1095
+ "loss": 0.1479,
1096
+ "step": 1450
1097
+ },
1098
+ {
1099
+ "epoch": 0.1651023408345584,
1100
+ "grad_norm": 2.494502544403076,
1101
+ "learning_rate": 6.162499352744754e-06,
1102
+ "loss": 0.1527,
1103
+ "step": 1460
1104
+ },
1105
+ {
1106
+ "epoch": 0.16623317878548005,
1107
+ "grad_norm": 1.5618149042129517,
1108
+ "learning_rate": 6.144043054686022e-06,
1109
+ "loss": 0.1873,
1110
+ "step": 1470
1111
+ },
1112
+ {
1113
+ "epoch": 0.16736401673640167,
1114
+ "grad_norm": 2.2748749256134033,
1115
+ "learning_rate": 6.125488346894139e-06,
1116
+ "loss": 0.2023,
1117
+ "step": 1480
1118
+ },
1119
+ {
1120
+ "epoch": 0.1684948546873233,
1121
+ "grad_norm": 1.5124659538269043,
1122
+ "learning_rate": 6.106835992080464e-06,
1123
+ "loss": 0.0645,
1124
+ "step": 1490
1125
+ },
1126
+ {
1127
+ "epoch": 0.16962569263824495,
1128
+ "grad_norm": 5.185418128967285,
1129
+ "learning_rate": 6.088086756970252e-06,
1130
+ "loss": 0.1843,
1131
+ "step": 1500
1132
+ },
1133
+ {
1134
+ "epoch": 0.16962569263824495,
1135
+ "eval_loss": 0.3870772421360016,
1136
+ "eval_runtime": 35.7004,
1137
+ "eval_samples_per_second": 14.005,
1138
+ "eval_steps_per_second": 14.005,
1139
+ "step": 1500
1140
  }
1141
  ],
1142
  "logging_steps": 10,
 
1156
  "attributes": {}
1157
  }
1158
  },
1159
+ "total_flos": 2.0460281562759168e+17,
1160
  "train_batch_size": 8,
1161
  "trial_name": null,
1162
  "trial_params": null