neuralwonderland commited on
Commit
5cf7e14
·
verified ·
1 Parent(s): cb6a33f

Training in progress, step 150, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8d2559cf0a778e9840a4c37a4d46ac67ebec08e11276c08d4ebc4a1ab32841f
3
  size 319876032
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1ed692f944315bade0586154433ae05196c130d3c1a1031148af0026b3dad070
3
  size 319876032
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1768c8213f9998c620429e755fdaaddfdcee6ac18984259d54465be24333369c
3
  size 640010002
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:59581924af07ec8a919c943285b2e449aed8d904949a144c9c3a7335cf2e0414
3
  size 640010002
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:46985dbff475a9b7c7985308ad4d08ce1029dc6f59bccbb70b71ff753618d2b8
3
- size 14308
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dc13ec54115687b66c0bacabf0e2de654fcd14a9636f330067b3d04364bf3419
3
+ size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6549e0cb3283f731e5eb41433c64c6e6fa61af711d2cfd4b4a1929c1bdb5b64f
3
  size 1256
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c01a9c9216d924473c3e5c0df9bd20a460a41ed4b43baf53c46506e9cefba51
3
  size 1256
last-checkpoint/trainer_state.json CHANGED
@@ -1,7 +1,7 @@
1
  {
2
- "best_metric": 0.5502287745475769,
3
  "best_model_checkpoint": "./output/checkpoint-150",
4
- "epoch": 0.016962569263824494,
5
  "eval_steps": 150,
6
  "global_step": 150,
7
  "is_hyper_param_search": false,
@@ -9,116 +9,116 @@
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
- "epoch": 0.0011308379509216329,
13
- "grad_norm": 3.084001064300537,
14
- "learning_rate": 7.500000000000001e-06,
15
- "loss": 1.1033,
16
  "step": 10
17
  },
18
  {
19
- "epoch": 0.0022616759018432657,
20
- "grad_norm": 1.3109848499298096,
21
- "learning_rate": 1.5000000000000002e-05,
22
- "loss": 1.3614,
23
  "step": 20
24
  },
25
  {
26
- "epoch": 0.003392513852764899,
27
- "grad_norm": 8.813097953796387,
28
- "learning_rate": 2.25e-05,
29
- "loss": 0.6036,
30
  "step": 30
31
  },
32
  {
33
- "epoch": 0.0045233518036865315,
34
- "grad_norm": 1.5215480327606201,
35
- "learning_rate": 3.0000000000000004e-05,
36
- "loss": 0.3051,
37
  "step": 40
38
  },
39
  {
40
- "epoch": 0.005654189754608165,
41
- "grad_norm": 3.0029616355895996,
42
- "learning_rate": 3.7500000000000003e-05,
43
- "loss": 0.4012,
44
  "step": 50
45
  },
46
  {
47
- "epoch": 0.006785027705529798,
48
- "grad_norm": 4.15091609954834,
49
- "learning_rate": 4.5e-05,
50
- "loss": 0.2458,
51
  "step": 60
52
  },
53
  {
54
- "epoch": 0.00791586565645143,
55
- "grad_norm": 0.9505107402801514,
56
- "learning_rate": 5.25e-05,
57
- "loss": 0.216,
58
  "step": 70
59
  },
60
  {
61
- "epoch": 0.009046703607373063,
62
- "grad_norm": 3.8529767990112305,
63
- "learning_rate": 6.000000000000001e-05,
64
- "loss": 0.2557,
65
  "step": 80
66
  },
67
  {
68
- "epoch": 0.010177541558294697,
69
- "grad_norm": 1.3274264335632324,
70
- "learning_rate": 6.750000000000001e-05,
71
- "loss": 0.2389,
72
  "step": 90
73
  },
74
  {
75
- "epoch": 0.01130837950921633,
76
- "grad_norm": 1.8785921335220337,
77
- "learning_rate": 7.500000000000001e-05,
78
- "loss": 0.2036,
79
  "step": 100
80
  },
81
  {
82
- "epoch": 0.012439217460137961,
83
- "grad_norm": 1.5390714406967163,
84
- "learning_rate": 7.499922926093873e-05,
85
- "loss": 0.2569,
86
  "step": 110
87
  },
88
  {
89
- "epoch": 0.013570055411059595,
90
- "grad_norm": 1.8701865673065186,
91
- "learning_rate": 7.499691707543698e-05,
92
- "loss": 0.1756,
93
  "step": 120
94
  },
95
  {
96
- "epoch": 0.014700893361981228,
97
- "grad_norm": 1.2455791234970093,
98
- "learning_rate": 7.499306353853963e-05,
99
- "loss": 0.3081,
100
  "step": 130
101
  },
102
  {
103
- "epoch": 0.01583173131290286,
104
- "grad_norm": 1.9865636825561523,
105
- "learning_rate": 7.49876688086505e-05,
106
- "loss": 0.3142,
107
  "step": 140
108
  },
109
  {
110
- "epoch": 0.016962569263824494,
111
- "grad_norm": 6.710127830505371,
112
- "learning_rate": 7.498073310752581e-05,
113
- "loss": 0.276,
114
  "step": 150
115
  },
116
  {
117
- "epoch": 0.016962569263824494,
118
- "eval_loss": 0.5502287745475769,
119
- "eval_runtime": 39.7817,
120
- "eval_samples_per_second": 12.569,
121
- "eval_steps_per_second": 12.569,
122
  "step": 150
123
  }
124
  ],
@@ -139,8 +139,8 @@
139
  "attributes": {}
140
  }
141
  },
142
- "total_flos": 2.092620352271155e+16,
143
- "train_batch_size": 8,
144
  "trial_name": null,
145
  "trial_params": null
146
  }
 
1
  {
2
+ "best_metric": 0.4814591705799103,
3
  "best_model_checkpoint": "./output/checkpoint-150",
4
+ "epoch": 0.008481284631912247,
5
  "eval_steps": 150,
6
  "global_step": 150,
7
  "is_hyper_param_search": false,
 
9
  "is_world_process_zero": true,
10
  "log_history": [
11
  {
12
+ "epoch": 0.0005654189754608164,
13
+ "grad_norm": 2.227701425552368,
14
+ "learning_rate": 7.500000000000001e-07,
15
+ "loss": 0.1995,
16
  "step": 10
17
  },
18
  {
19
+ "epoch": 0.0011308379509216329,
20
+ "grad_norm": 5.525130748748779,
21
+ "learning_rate": 1.5000000000000002e-06,
22
+ "loss": 0.2334,
23
  "step": 20
24
  },
25
  {
26
+ "epoch": 0.0016962569263824494,
27
+ "grad_norm": 1.4867887496948242,
28
+ "learning_rate": 2.25e-06,
29
+ "loss": 0.3133,
30
  "step": 30
31
  },
32
  {
33
+ "epoch": 0.0022616759018432657,
34
+ "grad_norm": 0.5477761626243591,
35
+ "learning_rate": 3.0000000000000005e-06,
36
+ "loss": 0.1692,
37
  "step": 40
38
  },
39
  {
40
+ "epoch": 0.0028270948773040823,
41
+ "grad_norm": 1.7914361953735352,
42
+ "learning_rate": 3.7500000000000005e-06,
43
+ "loss": 0.3082,
44
  "step": 50
45
  },
46
  {
47
+ "epoch": 0.003392513852764899,
48
+ "grad_norm": 0.40979117155075073,
49
+ "learning_rate": 4.5e-06,
50
+ "loss": 0.1468,
51
  "step": 60
52
  },
53
  {
54
+ "epoch": 0.003957932828225715,
55
+ "grad_norm": 5.27268123626709,
56
+ "learning_rate": 5.2500000000000006e-06,
57
+ "loss": 0.2381,
58
  "step": 70
59
  },
60
  {
61
+ "epoch": 0.0045233518036865315,
62
+ "grad_norm": 8.342147827148438,
63
+ "learning_rate": 6.000000000000001e-06,
64
+ "loss": 0.2292,
65
  "step": 80
66
  },
67
  {
68
+ "epoch": 0.0050887707791473485,
69
+ "grad_norm": 4.8586745262146,
70
+ "learning_rate": 6.7500000000000014e-06,
71
+ "loss": 0.1351,
72
  "step": 90
73
  },
74
  {
75
+ "epoch": 0.005654189754608165,
76
+ "grad_norm": 7.848427772521973,
77
+ "learning_rate": 7.500000000000001e-06,
78
+ "loss": 0.2967,
79
  "step": 100
80
  },
81
  {
82
+ "epoch": 0.006219608730068981,
83
+ "grad_norm": 0.6273432374000549,
84
+ "learning_rate": 7.499922926093874e-06,
85
+ "loss": 0.1445,
86
  "step": 110
87
  },
88
  {
89
+ "epoch": 0.006785027705529798,
90
+ "grad_norm": 0.21901638805866241,
91
+ "learning_rate": 7.499691707543699e-06,
92
+ "loss": 0.2228,
93
  "step": 120
94
  },
95
  {
96
+ "epoch": 0.007350446680990614,
97
+ "grad_norm": 0.5449599027633667,
98
+ "learning_rate": 7.499306353853963e-06,
99
+ "loss": 0.2675,
100
  "step": 130
101
  },
102
  {
103
+ "epoch": 0.00791586565645143,
104
+ "grad_norm": 0.5028505921363831,
105
+ "learning_rate": 7.49876688086505e-06,
106
+ "loss": 0.1786,
107
  "step": 140
108
  },
109
  {
110
+ "epoch": 0.008481284631912247,
111
+ "grad_norm": 1.582517385482788,
112
+ "learning_rate": 7.4980733107525805e-06,
113
+ "loss": 0.2159,
114
  "step": 150
115
  },
116
  {
117
+ "epoch": 0.008481284631912247,
118
+ "eval_loss": 0.4814591705799103,
119
+ "eval_runtime": 39.6064,
120
+ "eval_samples_per_second": 12.624,
121
+ "eval_steps_per_second": 12.624,
122
  "step": 150
123
  }
124
  ],
 
139
  "attributes": {}
140
  }
141
  },
142
+ "total_flos": 8916555427872768.0,
143
+ "train_batch_size": 4,
144
  "trial_name": null,
145
  "trial_params": null
146
  }
last-checkpoint/training_args.bin CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3f279fd1e1d39cc83bb0e079653c3d42ec760dfe034424991216b98360d897a4
3
  size 5496
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c342b6df7f9ed99a585d948f2c42b7aa526c41d1d12a548cf7269c01c411c5
3
  size 5496