Edison commited on
Commit
9d52bf6
1 Parent(s): a6d652c

End of training

Browse files
.gitignore ADDED
@@ -0,0 +1 @@
 
 
1
+ checkpoint-*/
all_results.json ADDED
@@ -0,0 +1,18 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9230769230769231,
4
+ "eval_f1": 0.9189189189189189,
5
+ "eval_loss": 0.5037449598312378,
6
+ "eval_precision": 0.85,
7
+ "eval_recall": 1.0,
8
+ "eval_roc_auc": 0.9318181818181819,
9
+ "eval_runtime": 0.062,
10
+ "eval_samples": 39,
11
+ "eval_samples_per_second": 629.027,
12
+ "eval_steps_per_second": 48.387,
13
+ "train_loss": 0.1377350079192018,
14
+ "train_runtime": 17.676,
15
+ "train_samples": 343,
16
+ "train_samples_per_second": 194.048,
17
+ "train_steps_per_second": 12.446
18
+ }
config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "neuralsentry/distilbert-git-commits-mlm",
3
+ "activation": "gelu",
4
+ "architectures": [
5
+ "DistilBertForSequenceClassification"
6
+ ],
7
+ "attention_dropout": 0.1,
8
+ "dim": 768,
9
+ "dropout": 0.1,
10
+ "hidden_dim": 3072,
11
+ "initializer_range": 0.02,
12
+ "max_position_embeddings": 512,
13
+ "model_type": "distilbert",
14
+ "n_heads": 12,
15
+ "n_layers": 6,
16
+ "pad_token_id": 0,
17
+ "problem_type": "single_label_classification",
18
+ "qa_dropout": 0.1,
19
+ "seq_classif_dropout": 0.2,
20
+ "sinusoidal_pos_embds": false,
21
+ "tie_weights_": true,
22
+ "torch_dtype": "float32",
23
+ "transformers_version": "4.30.2",
24
+ "vocab_size": 30522
25
+ }
eval_results.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "eval_accuracy": 0.9230769230769231,
4
+ "eval_f1": 0.9189189189189189,
5
+ "eval_loss": 0.5037449598312378,
6
+ "eval_precision": 0.85,
7
+ "eval_recall": 1.0,
8
+ "eval_roc_auc": 0.9318181818181819,
9
+ "eval_runtime": 0.062,
10
+ "eval_samples": 39,
11
+ "eval_samples_per_second": 629.027,
12
+ "eval_steps_per_second": 48.387
13
+ }
pytorch_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86eb57ae585fbb2496727a9431388d96b5f6fec8df377860193163404db0adac
3
+ size 267855533
special_tokens_map.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "cls_token": "[CLS]",
3
+ "mask_token": "[MASK]",
4
+ "pad_token": "[PAD]",
5
+ "sep_token": "[SEP]",
6
+ "unk_token": "[UNK]"
7
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "clean_up_tokenization_spaces": true,
3
+ "cls_token": "[CLS]",
4
+ "do_lower_case": true,
5
+ "mask_token": "[MASK]",
6
+ "model_max_length": 512,
7
+ "pad_token": "[PAD]",
8
+ "sep_token": "[SEP]",
9
+ "strip_accents": null,
10
+ "tokenize_chinese_chars": true,
11
+ "tokenizer_class": "DistilBertTokenizer",
12
+ "unk_token": "[UNK]"
13
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 10.0,
3
+ "train_loss": 0.1377350079192018,
4
+ "train_runtime": 17.676,
5
+ "train_samples": 343,
6
+ "train_samples_per_second": 194.048,
7
+ "train_steps_per_second": 12.446
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,215 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 10.0,
5
+ "global_step": 220,
6
+ "is_hyper_param_search": false,
7
+ "is_local_process_zero": true,
8
+ "is_world_process_zero": true,
9
+ "log_history": [
10
+ {
11
+ "epoch": 1.0,
12
+ "learning_rate": 9.090909090909092e-05,
13
+ "loss": 0.6837,
14
+ "step": 22
15
+ },
16
+ {
17
+ "epoch": 1.0,
18
+ "eval_accuracy": 0.5897435897435898,
19
+ "eval_f1": 0.6666666666666666,
20
+ "eval_loss": 0.6039571166038513,
21
+ "eval_precision": 0.5161290322580645,
22
+ "eval_recall": 0.9411764705882353,
23
+ "eval_roc_auc": 0.6296791443850267,
24
+ "eval_runtime": 0.069,
25
+ "eval_samples_per_second": 565.227,
26
+ "eval_steps_per_second": 43.479,
27
+ "step": 22
28
+ },
29
+ {
30
+ "epoch": 2.0,
31
+ "learning_rate": 8.090909090909092e-05,
32
+ "loss": 0.3852,
33
+ "step": 44
34
+ },
35
+ {
36
+ "epoch": 2.0,
37
+ "eval_accuracy": 0.9230769230769231,
38
+ "eval_f1": 0.9189189189189189,
39
+ "eval_loss": 0.28810474276542664,
40
+ "eval_precision": 0.85,
41
+ "eval_recall": 1.0,
42
+ "eval_roc_auc": 0.9318181818181819,
43
+ "eval_runtime": 0.064,
44
+ "eval_samples_per_second": 609.374,
45
+ "eval_steps_per_second": 46.875,
46
+ "step": 44
47
+ },
48
+ {
49
+ "epoch": 3.0,
50
+ "learning_rate": 7.090909090909092e-05,
51
+ "loss": 0.2148,
52
+ "step": 66
53
+ },
54
+ {
55
+ "epoch": 3.0,
56
+ "eval_accuracy": 0.9230769230769231,
57
+ "eval_f1": 0.9189189189189189,
58
+ "eval_loss": 0.38069406151771545,
59
+ "eval_precision": 0.85,
60
+ "eval_recall": 1.0,
61
+ "eval_roc_auc": 0.9318181818181819,
62
+ "eval_runtime": 0.065,
63
+ "eval_samples_per_second": 600.004,
64
+ "eval_steps_per_second": 46.154,
65
+ "step": 66
66
+ },
67
+ {
68
+ "epoch": 4.0,
69
+ "learning_rate": 6.090909090909091e-05,
70
+ "loss": 0.0701,
71
+ "step": 88
72
+ },
73
+ {
74
+ "epoch": 4.0,
75
+ "eval_accuracy": 0.8717948717948718,
76
+ "eval_f1": 0.8717948717948718,
77
+ "eval_loss": 0.4933823347091675,
78
+ "eval_precision": 0.7727272727272727,
79
+ "eval_recall": 1.0,
80
+ "eval_roc_auc": 0.8863636363636364,
81
+ "eval_runtime": 0.071,
82
+ "eval_samples_per_second": 549.297,
83
+ "eval_steps_per_second": 42.254,
84
+ "step": 88
85
+ },
86
+ {
87
+ "epoch": 5.0,
88
+ "learning_rate": 5.090909090909091e-05,
89
+ "loss": 0.0164,
90
+ "step": 110
91
+ },
92
+ {
93
+ "epoch": 5.0,
94
+ "eval_accuracy": 0.8974358974358975,
95
+ "eval_f1": 0.8947368421052632,
96
+ "eval_loss": 0.4892081320285797,
97
+ "eval_precision": 0.8095238095238095,
98
+ "eval_recall": 1.0,
99
+ "eval_roc_auc": 0.9090909090909091,
100
+ "eval_runtime": 0.064,
101
+ "eval_samples_per_second": 609.381,
102
+ "eval_steps_per_second": 46.875,
103
+ "step": 110
104
+ },
105
+ {
106
+ "epoch": 6.0,
107
+ "learning_rate": 4.0909090909090915e-05,
108
+ "loss": 0.0039,
109
+ "step": 132
110
+ },
111
+ {
112
+ "epoch": 6.0,
113
+ "eval_accuracy": 0.8974358974358975,
114
+ "eval_f1": 0.8947368421052632,
115
+ "eval_loss": 0.49288177490234375,
116
+ "eval_precision": 0.8095238095238095,
117
+ "eval_recall": 1.0,
118
+ "eval_roc_auc": 0.9090909090909091,
119
+ "eval_runtime": 0.058,
120
+ "eval_samples_per_second": 672.418,
121
+ "eval_steps_per_second": 51.724,
122
+ "step": 132
123
+ },
124
+ {
125
+ "epoch": 7.0,
126
+ "learning_rate": 3.090909090909091e-05,
127
+ "loss": 0.0012,
128
+ "step": 154
129
+ },
130
+ {
131
+ "epoch": 7.0,
132
+ "eval_accuracy": 0.9230769230769231,
133
+ "eval_f1": 0.9189189189189189,
134
+ "eval_loss": 0.4065372943878174,
135
+ "eval_precision": 0.85,
136
+ "eval_recall": 1.0,
137
+ "eval_roc_auc": 0.9318181818181819,
138
+ "eval_runtime": 0.063,
139
+ "eval_samples_per_second": 619.043,
140
+ "eval_steps_per_second": 47.619,
141
+ "step": 154
142
+ },
143
+ {
144
+ "epoch": 8.0,
145
+ "learning_rate": 2.090909090909091e-05,
146
+ "loss": 0.0008,
147
+ "step": 176
148
+ },
149
+ {
150
+ "epoch": 8.0,
151
+ "eval_accuracy": 0.9230769230769231,
152
+ "eval_f1": 0.9189189189189189,
153
+ "eval_loss": 0.48369330167770386,
154
+ "eval_precision": 0.85,
155
+ "eval_recall": 1.0,
156
+ "eval_roc_auc": 0.9318181818181819,
157
+ "eval_runtime": 0.059,
158
+ "eval_samples_per_second": 661.03,
159
+ "eval_steps_per_second": 50.848,
160
+ "step": 176
161
+ },
162
+ {
163
+ "epoch": 9.0,
164
+ "learning_rate": 1.0909090909090909e-05,
165
+ "loss": 0.0007,
166
+ "step": 198
167
+ },
168
+ {
169
+ "epoch": 9.0,
170
+ "eval_accuracy": 0.9230769230769231,
171
+ "eval_f1": 0.9189189189189189,
172
+ "eval_loss": 0.4999829828739166,
173
+ "eval_precision": 0.85,
174
+ "eval_recall": 1.0,
175
+ "eval_roc_auc": 0.9318181818181819,
176
+ "eval_runtime": 0.061,
177
+ "eval_samples_per_second": 639.321,
178
+ "eval_steps_per_second": 49.179,
179
+ "step": 198
180
+ },
181
+ {
182
+ "epoch": 10.0,
183
+ "learning_rate": 9.09090909090909e-07,
184
+ "loss": 0.0006,
185
+ "step": 220
186
+ },
187
+ {
188
+ "epoch": 10.0,
189
+ "eval_accuracy": 0.9230769230769231,
190
+ "eval_f1": 0.9189189189189189,
191
+ "eval_loss": 0.5037449598312378,
192
+ "eval_precision": 0.85,
193
+ "eval_recall": 1.0,
194
+ "eval_roc_auc": 0.9318181818181819,
195
+ "eval_runtime": 0.058,
196
+ "eval_samples_per_second": 672.418,
197
+ "eval_steps_per_second": 51.724,
198
+ "step": 220
199
+ },
200
+ {
201
+ "epoch": 10.0,
202
+ "step": 220,
203
+ "total_flos": 197738122089864.0,
204
+ "train_loss": 0.1377350079192018,
205
+ "train_runtime": 17.676,
206
+ "train_samples_per_second": 194.048,
207
+ "train_steps_per_second": 12.446
208
+ }
209
+ ],
210
+ "max_steps": 220,
211
+ "num_train_epochs": 10,
212
+ "total_flos": 197738122089864.0,
213
+ "trial_name": null,
214
+ "trial_params": null
215
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0fa15730fd91492ce3dbce622dd0d77268b21b86f56f2301bed4c78fb9e37cdf
3
+ size 4091
vocab.txt ADDED
The diff for this file is too large to render. See raw diff