learn3r commited on
Commit
fe61cca
1 Parent(s): 4eaed6f

End of training

Browse files
Files changed (5) hide show
  1. README.md +3 -1
  2. all_results.json +7 -7
  3. eval_results.json +4 -4
  4. train_results.json +3 -3
  5. trainer_state.json +57 -57
README.md CHANGED
@@ -3,6 +3,8 @@ license: apache-2.0
3
  base_model: facebook/bart-base
4
  tags:
5
  - generated_from_trainer
 
 
6
  model-index:
7
  - name: summ_screen_fd_blueprint_epoch_10
8
  results: []
@@ -13,7 +15,7 @@ should probably proofread and complete it, then remove this comment. -->
13
 
14
  # summ_screen_fd_blueprint_epoch_10
15
 
16
- This model is a fine-tuned version of [facebook/bart-base](https://huggingface.co/facebook/bart-base) on an unknown dataset.
17
  It achieves the following results on the evaluation set:
18
  - Loss: 2.0208
19
 
 
3
  base_model: facebook/bart-base
4
  tags:
5
  - generated_from_trainer
6
+ datasets:
7
+ - learn3r/summ_screen_fd_bp
8
  model-index:
9
  - name: summ_screen_fd_blueprint_epoch_10
10
  results: []
 
15
 
16
  # summ_screen_fd_blueprint_epoch_10
17
 
18
+ This model is a fine-tuned version of [facebook/bart-base](https://huggingface.co/facebook/bart-base) on the learn3r/summ_screen_fd_bp dataset.
19
  It achieves the following results on the evaluation set:
20
  - Loss: 2.0208
21
 
all_results.json CHANGED
@@ -1,13 +1,13 @@
1
  {
2
  "epoch": 9.74,
3
- "eval_loss": 1.9589176177978516,
4
- "eval_runtime": 2.869,
5
  "eval_samples": 338,
6
- "eval_samples_per_second": 117.812,
7
- "eval_steps_per_second": 14.988,
8
- "train_loss": 2.299447972433908,
9
- "train_runtime": 1088.9846,
10
  "train_samples": 3673,
11
- "train_samples_per_second": 33.729,
12
  "train_steps_per_second": 0.129
13
  }
 
1
  {
2
  "epoch": 9.74,
3
+ "eval_loss": 2.020766019821167,
4
+ "eval_runtime": 3.1026,
5
  "eval_samples": 338,
6
+ "eval_samples_per_second": 108.941,
7
+ "eval_steps_per_second": 13.859,
8
+ "train_loss": 2.210656370435442,
9
+ "train_runtime": 1086.7433,
10
  "train_samples": 3673,
11
+ "train_samples_per_second": 33.798,
12
  "train_steps_per_second": 0.129
13
  }
eval_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.74,
3
- "eval_loss": 1.9589176177978516,
4
- "eval_runtime": 2.869,
5
  "eval_samples": 338,
6
- "eval_samples_per_second": 117.812,
7
- "eval_steps_per_second": 14.988
8
  }
 
1
  {
2
  "epoch": 9.74,
3
+ "eval_loss": 2.020766019821167,
4
+ "eval_runtime": 3.1026,
5
  "eval_samples": 338,
6
+ "eval_samples_per_second": 108.941,
7
+ "eval_steps_per_second": 13.859
8
  }
train_results.json CHANGED
@@ -1,8 +1,8 @@
1
  {
2
  "epoch": 9.74,
3
- "train_loss": 2.299447972433908,
4
- "train_runtime": 1088.9846,
5
  "train_samples": 3673,
6
- "train_samples_per_second": 33.729,
7
  "train_steps_per_second": 0.129
8
  }
 
1
  {
2
  "epoch": 9.74,
3
+ "train_loss": 2.210656370435442,
4
+ "train_runtime": 1086.7433,
5
  "train_samples": 3673,
6
+ "train_samples_per_second": 33.798,
7
  "train_steps_per_second": 0.129
8
  }
trainer_state.json CHANGED
@@ -10,174 +10,174 @@
10
  {
11
  "epoch": 0.7,
12
  "learning_rate": 9.285714285714286e-05,
13
- "loss": 3.519,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.97,
18
- "eval_loss": 2.3055925369262695,
19
- "eval_runtime": 3.071,
20
- "eval_samples_per_second": 110.064,
21
- "eval_steps_per_second": 14.002,
22
  "step": 14
23
  },
24
  {
25
  "epoch": 1.39,
26
  "learning_rate": 8.571428571428571e-05,
27
- "loss": 2.6644,
28
  "step": 20
29
  },
30
  {
31
  "epoch": 1.95,
32
- "eval_loss": 2.1348636150360107,
33
- "eval_runtime": 2.9701,
34
- "eval_samples_per_second": 113.801,
35
- "eval_steps_per_second": 14.478,
36
  "step": 28
37
  },
38
  {
39
  "epoch": 2.09,
40
  "learning_rate": 7.857142857142858e-05,
41
- "loss": 2.465,
42
  "step": 30
43
  },
44
  {
45
  "epoch": 2.78,
46
  "learning_rate": 7.142857142857143e-05,
47
- "loss": 2.3418,
48
  "step": 40
49
  },
50
  {
51
  "epoch": 2.99,
52
- "eval_loss": 2.0616259574890137,
53
- "eval_runtime": 2.731,
54
- "eval_samples_per_second": 123.764,
55
- "eval_steps_per_second": 15.745,
56
  "step": 43
57
  },
58
  {
59
  "epoch": 3.48,
60
  "learning_rate": 6.428571428571429e-05,
61
- "loss": 2.258,
62
  "step": 50
63
  },
64
  {
65
  "epoch": 3.97,
66
- "eval_loss": 2.035996913909912,
67
- "eval_runtime": 2.9612,
68
- "eval_samples_per_second": 114.144,
69
- "eval_steps_per_second": 14.521,
70
  "step": 57
71
  },
72
  {
73
  "epoch": 4.17,
74
  "learning_rate": 5.714285714285714e-05,
75
- "loss": 2.2166,
76
  "step": 60
77
  },
78
  {
79
  "epoch": 4.87,
80
  "learning_rate": 5e-05,
81
- "loss": 2.169,
82
  "step": 70
83
  },
84
  {
85
  "epoch": 4.94,
86
- "eval_loss": 1.9996529817581177,
87
- "eval_runtime": 2.9471,
88
- "eval_samples_per_second": 114.69,
89
- "eval_steps_per_second": 14.591,
90
  "step": 71
91
  },
92
  {
93
  "epoch": 5.57,
94
  "learning_rate": 4.2857142857142856e-05,
95
- "loss": 2.1336,
96
  "step": 80
97
  },
98
  {
99
  "epoch": 5.98,
100
- "eval_loss": 1.986232042312622,
101
- "eval_runtime": 2.9637,
102
- "eval_samples_per_second": 114.046,
103
- "eval_steps_per_second": 14.509,
104
  "step": 86
105
  },
106
  {
107
  "epoch": 6.26,
108
  "learning_rate": 3.571428571428572e-05,
109
- "loss": 2.1131,
110
  "step": 90
111
  },
112
  {
113
  "epoch": 6.96,
114
  "learning_rate": 2.857142857142857e-05,
115
- "loss": 2.0952,
116
  "step": 100
117
  },
118
  {
119
  "epoch": 6.96,
120
- "eval_loss": 1.9753971099853516,
121
- "eval_runtime": 2.8395,
122
- "eval_samples_per_second": 119.034,
123
- "eval_steps_per_second": 15.143,
124
  "step": 100
125
  },
126
  {
127
  "epoch": 7.65,
128
  "learning_rate": 2.1428571428571428e-05,
129
- "loss": 2.0666,
130
  "step": 110
131
  },
132
  {
133
  "epoch": 8.0,
134
- "eval_loss": 1.9659732580184937,
135
- "eval_runtime": 2.7307,
136
- "eval_samples_per_second": 123.779,
137
- "eval_steps_per_second": 15.747,
138
  "step": 115
139
  },
140
  {
141
  "epoch": 8.35,
142
  "learning_rate": 1.4285714285714285e-05,
143
- "loss": 2.0563,
144
  "step": 120
145
  },
146
  {
147
  "epoch": 8.97,
148
- "eval_loss": 1.9610240459442139,
149
- "eval_runtime": 2.8488,
150
- "eval_samples_per_second": 118.648,
151
- "eval_steps_per_second": 15.094,
152
  "step": 129
153
  },
154
  {
155
  "epoch": 9.04,
156
  "learning_rate": 7.142857142857143e-06,
157
- "loss": 2.051,
158
  "step": 130
159
  },
160
  {
161
  "epoch": 9.74,
162
  "learning_rate": 0.0,
163
- "loss": 2.0426,
164
  "step": 140
165
  },
166
  {
167
  "epoch": 9.74,
168
- "eval_loss": 1.9589176177978516,
169
- "eval_runtime": 2.9503,
170
- "eval_samples_per_second": 114.564,
171
- "eval_steps_per_second": 14.575,
172
  "step": 140
173
  },
174
  {
175
  "epoch": 9.74,
176
  "step": 140,
177
  "total_flos": 2.181454126645248e+16,
178
- "train_loss": 2.299447972433908,
179
- "train_runtime": 1088.9846,
180
- "train_samples_per_second": 33.729,
181
  "train_steps_per_second": 0.129
182
  }
183
  ],
 
10
  {
11
  "epoch": 0.7,
12
  "learning_rate": 9.285714285714286e-05,
13
+ "loss": 3.382,
14
  "step": 10
15
  },
16
  {
17
  "epoch": 0.97,
18
+ "eval_loss": 2.3120291233062744,
19
+ "eval_runtime": 3.0033,
20
+ "eval_samples_per_second": 112.543,
21
+ "eval_steps_per_second": 14.318,
22
  "step": 14
23
  },
24
  {
25
  "epoch": 1.39,
26
  "learning_rate": 8.571428571428571e-05,
27
+ "loss": 2.5354,
28
  "step": 20
29
  },
30
  {
31
  "epoch": 1.95,
32
+ "eval_loss": 2.1625144481658936,
33
+ "eval_runtime": 2.8876,
34
+ "eval_samples_per_second": 117.052,
35
+ "eval_steps_per_second": 14.891,
36
  "step": 28
37
  },
38
  {
39
  "epoch": 2.09,
40
  "learning_rate": 7.857142857142858e-05,
41
+ "loss": 2.3745,
42
  "step": 30
43
  },
44
  {
45
  "epoch": 2.78,
46
  "learning_rate": 7.142857142857143e-05,
47
+ "loss": 2.2504,
48
  "step": 40
49
  },
50
  {
51
  "epoch": 2.99,
52
+ "eval_loss": 2.103144645690918,
53
+ "eval_runtime": 2.866,
54
+ "eval_samples_per_second": 117.932,
55
+ "eval_steps_per_second": 15.003,
56
  "step": 43
57
  },
58
  {
59
  "epoch": 3.48,
60
  "learning_rate": 6.428571428571429e-05,
61
+ "loss": 2.1809,
62
  "step": 50
63
  },
64
  {
65
  "epoch": 3.97,
66
+ "eval_loss": 2.0733468532562256,
67
+ "eval_runtime": 2.6867,
68
+ "eval_samples_per_second": 125.803,
69
+ "eval_steps_per_second": 16.005,
70
  "step": 57
71
  },
72
  {
73
  "epoch": 4.17,
74
  "learning_rate": 5.714285714285714e-05,
75
+ "loss": 2.1339,
76
  "step": 60
77
  },
78
  {
79
  "epoch": 4.87,
80
  "learning_rate": 5e-05,
81
+ "loss": 2.0889,
82
  "step": 70
83
  },
84
  {
85
  "epoch": 4.94,
86
+ "eval_loss": 2.0512640476226807,
87
+ "eval_runtime": 2.8196,
88
+ "eval_samples_per_second": 119.875,
89
+ "eval_steps_per_second": 15.25,
90
  "step": 71
91
  },
92
  {
93
  "epoch": 5.57,
94
  "learning_rate": 4.2857142857142856e-05,
95
+ "loss": 2.0586,
96
  "step": 80
97
  },
98
  {
99
  "epoch": 5.98,
100
+ "eval_loss": 2.0358786582946777,
101
+ "eval_runtime": 2.6297,
102
+ "eval_samples_per_second": 128.531,
103
+ "eval_steps_per_second": 16.352,
104
  "step": 86
105
  },
106
  {
107
  "epoch": 6.26,
108
  "learning_rate": 3.571428571428572e-05,
109
+ "loss": 2.0399,
110
  "step": 90
111
  },
112
  {
113
  "epoch": 6.96,
114
  "learning_rate": 2.857142857142857e-05,
115
+ "loss": 2.0117,
116
  "step": 100
117
  },
118
  {
119
  "epoch": 6.96,
120
+ "eval_loss": 2.032505750656128,
121
+ "eval_runtime": 3.3953,
122
+ "eval_samples_per_second": 99.549,
123
+ "eval_steps_per_second": 12.665,
124
  "step": 100
125
  },
126
  {
127
  "epoch": 7.65,
128
  "learning_rate": 2.1428571428571428e-05,
129
+ "loss": 1.9849,
130
  "step": 110
131
  },
132
  {
133
  "epoch": 8.0,
134
+ "eval_loss": 2.025442600250244,
135
+ "eval_runtime": 3.0905,
136
+ "eval_samples_per_second": 109.368,
137
+ "eval_steps_per_second": 13.914,
138
  "step": 115
139
  },
140
  {
141
  "epoch": 8.35,
142
  "learning_rate": 1.4285714285714285e-05,
143
+ "loss": 1.9803,
144
  "step": 120
145
  },
146
  {
147
  "epoch": 8.97,
148
+ "eval_loss": 2.0218234062194824,
149
+ "eval_runtime": 3.3517,
150
+ "eval_samples_per_second": 100.846,
151
+ "eval_steps_per_second": 12.829,
152
  "step": 129
153
  },
154
  {
155
  "epoch": 9.04,
156
  "learning_rate": 7.142857142857143e-06,
157
+ "loss": 1.9694,
158
  "step": 130
159
  },
160
  {
161
  "epoch": 9.74,
162
  "learning_rate": 0.0,
163
+ "loss": 1.9584,
164
  "step": 140
165
  },
166
  {
167
  "epoch": 9.74,
168
+ "eval_loss": 2.020766019821167,
169
+ "eval_runtime": 2.8283,
170
+ "eval_samples_per_second": 119.505,
171
+ "eval_steps_per_second": 15.203,
172
  "step": 140
173
  },
174
  {
175
  "epoch": 9.74,
176
  "step": 140,
177
  "total_flos": 2.181454126645248e+16,
178
+ "train_loss": 2.210656370435442,
179
+ "train_runtime": 1086.7433,
180
+ "train_samples_per_second": 33.798,
181
  "train_steps_per_second": 0.129
182
  }
183
  ],