neuralwonderland commited on
Commit
46f13c4
·
verified ·
1 Parent(s): 52360cb

Training in progress, step 1500, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:43562e1d3c082e1d78b638e7ccb2b25658e8c66a467e585cd3a377404a32286a
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6db80fccb2c3e0fe5a4dad0dd63ff03c757b4ce44797eed73312c8747ceb4721
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:83b0c79868a26e1ad237c18cc2cbbfa3d56ad8dcc31cf48ec869b6d1d0838571
3
  size 640010002
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:214cffaa9c02699fb9eb500cc526ac344fea51d3dd7a6be7ed0604160917ceba
3
  size 640010002
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fbdfb3a71bf80c6f97621a0be978d4a01680d20d7308ae36262f594b931b7d7
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e4c17ab7dde949a9d7155db95cb0d86b87e0ca42009494abed577a3dbf347e8
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:18c88c03c83322e1a691e866b652d06d19003ce345c89230ff72194c865749c3
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7e2bb55e94fb33eeb522c3f2d754bc51aade7ec9f99182aa3065529c82c6dab5
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
- "best_metric": 0.3694673478603363,
3
- "best_model_checkpoint": "./output/checkpoint-1350",
4
- "epoch": 0.07633156168721023,
5
  "eval_steps": 150,
6
- "global_step": 1350,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1024,6 +1024,119 @@
1024
  "eval_samples_per_second": 12.645,
1025
  "eval_steps_per_second": 12.645,
1026
  "step": 1350
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1027
  }
1028
  ],
1029
  "logging_steps": 10,
@@ -1043,7 +1156,7 @@
1043
  "attributes": {}
1044
  }
1045
  },
1046
- "total_flos": 7.711481902517453e+16,
1047
  "train_batch_size": 4,
1048
  "trial_name": null,
1049
  "trial_params": null
 
1
  {
2
+ "best_metric": 0.3590245246887207,
3
+ "best_model_checkpoint": "./output/checkpoint-1500",
4
+ "epoch": 0.08481284631912248,
5
  "eval_steps": 150,
6
+ "global_step": 1500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1024
  "eval_samples_per_second": 12.645,
1025
  "eval_steps_per_second": 12.645,
1026
  "step": 1350
1027
+ },
1028
+ {
1029
+ "epoch": 0.07689698066267103,
1030
+ "grad_norm": 4.6597371101379395,
1031
+ "learning_rate": 6.341484933700744e-06,
1032
+ "loss": 0.2264,
1033
+ "step": 1360
1034
+ },
1035
+ {
1036
+ "epoch": 0.07746239963813185,
1037
+ "grad_norm": 2.3831920623779297,
1038
+ "learning_rate": 6.32405377684294e-06,
1039
+ "loss": 0.2637,
1040
+ "step": 1370
1041
+ },
1042
+ {
1043
+ "epoch": 0.07802781861359268,
1044
+ "grad_norm": 6.223957538604736,
1045
+ "learning_rate": 6.306516810716249e-06,
1046
+ "loss": 0.262,
1047
+ "step": 1380
1048
+ },
1049
+ {
1050
+ "epoch": 0.07859323758905348,
1051
+ "grad_norm": 9.916725158691406,
1052
+ "learning_rate": 6.288874756196662e-06,
1053
+ "loss": 0.2729,
1054
+ "step": 1390
1055
+ },
1056
+ {
1057
+ "epoch": 0.0791586565645143,
1058
+ "grad_norm": 0.49764057993888855,
1059
+ "learning_rate": 6.271128338479939e-06,
1060
+ "loss": 0.2377,
1061
+ "step": 1400
1062
+ },
1063
+ {
1064
+ "epoch": 0.07972407553997513,
1065
+ "grad_norm": 2.9618566036224365,
1066
+ "learning_rate": 6.253278287051806e-06,
1067
+ "loss": 0.249,
1068
+ "step": 1410
1069
+ },
1070
+ {
1071
+ "epoch": 0.08028949451543593,
1072
+ "grad_norm": 2.368474006652832,
1073
+ "learning_rate": 6.235325335657962e-06,
1074
+ "loss": 0.124,
1075
+ "step": 1420
1076
+ },
1077
+ {
1078
+ "epoch": 0.08085491349089675,
1079
+ "grad_norm": 0.38136398792266846,
1080
+ "learning_rate": 6.217270222273923e-06,
1081
+ "loss": 0.2674,
1082
+ "step": 1430
1083
+ },
1084
+ {
1085
+ "epoch": 0.08142033246635758,
1086
+ "grad_norm": 4.6476898193359375,
1087
+ "learning_rate": 6.1991136890746825e-06,
1088
+ "loss": 0.1299,
1089
+ "step": 1440
1090
+ },
1091
+ {
1092
+ "epoch": 0.08198575144181838,
1093
+ "grad_norm": 1.214414119720459,
1094
+ "learning_rate": 6.180856482404208e-06,
1095
+ "loss": 0.2702,
1096
+ "step": 1450
1097
+ },
1098
+ {
1099
+ "epoch": 0.0825511704172792,
1100
+ "grad_norm": 7.3671464920043945,
1101
+ "learning_rate": 6.162499352744754e-06,
1102
+ "loss": 0.2172,
1103
+ "step": 1460
1104
+ },
1105
+ {
1106
+ "epoch": 0.08311658939274003,
1107
+ "grad_norm": 0.29375457763671875,
1108
+ "learning_rate": 6.144043054686022e-06,
1109
+ "loss": 0.1906,
1110
+ "step": 1470
1111
+ },
1112
+ {
1113
+ "epoch": 0.08368200836820083,
1114
+ "grad_norm": 1.2316617965698242,
1115
+ "learning_rate": 6.125488346894139e-06,
1116
+ "loss": 0.2524,
1117
+ "step": 1480
1118
+ },
1119
+ {
1120
+ "epoch": 0.08424742734366165,
1121
+ "grad_norm": 4.17201566696167,
1122
+ "learning_rate": 6.106835992080464e-06,
1123
+ "loss": 0.2358,
1124
+ "step": 1490
1125
+ },
1126
+ {
1127
+ "epoch": 0.08481284631912248,
1128
+ "grad_norm": 0.6424977779388428,
1129
+ "learning_rate": 6.088086756970252e-06,
1130
+ "loss": 0.2243,
1131
+ "step": 1500
1132
+ },
1133
+ {
1134
+ "epoch": 0.08481284631912248,
1135
+ "eval_loss": 0.3590245246887207,
1136
+ "eval_runtime": 39.5277,
1137
+ "eval_samples_per_second": 12.649,
1138
+ "eval_steps_per_second": 12.649,
1139
+ "step": 1500
1140
  }
1141
  ],
1142
  "logging_steps": 10,
 
1156
  "attributes": {}
1157
  }
1158
  },
1159
+ "total_flos": 8.56989857589166e+16,
1160
  "train_batch_size": 4,
1161
  "trial_name": null,
1162
  "trial_params": null