Krisbiantoro commited on
Commit
1fa4da1
·
1 Parent(s): 072dc71

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ ---
4
+ ## Training procedure
5
+
6
+
7
+ The following `bitsandbytes` quantization config was used during training:
8
+ - quant_method: bitsandbytes
9
+ - load_in_8bit: False
10
+ - load_in_4bit: True
11
+ - llm_int8_threshold: 6.0
12
+ - llm_int8_skip_modules: None
13
+ - llm_int8_enable_fp32_cpu_offload: False
14
+ - llm_int8_has_fp16_weight: False
15
+ - bnb_4bit_quant_type: fp4
16
+ - bnb_4bit_use_double_quant: False
17
+ - bnb_4bit_compute_dtype: float32
18
+ ### Framework versions
19
+
20
+
21
+ - PEFT 0.5.0
adapter_config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "auto_mapping": null,
3
+ "base_model_name_or_path": null,
4
+ "bias": "none",
5
+ "fan_in_fan_out": false,
6
+ "inference_mode": true,
7
+ "init_lora_weights": true,
8
+ "layers_pattern": null,
9
+ "layers_to_transform": null,
10
+ "lora_alpha": 16,
11
+ "lora_dropout": 0.05,
12
+ "modules_to_save": null,
13
+ "peft_type": "LORA",
14
+ "r": 8,
15
+ "revision": null,
16
+ "target_modules": [
17
+ "q_proj",
18
+ "k_proj",
19
+ "v_proj",
20
+ "gate_proj",
21
+ "up_proj",
22
+ "down_proj"
23
+ ],
24
+ "task_type": "CAUSAL_LM"
25
+ }
adapter_model.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7fb41ed248b38a0d5c018c6a8583bdfba80b6d2fa606506f6813c43958a99bbf
3
+ size 75641741
optimizer.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:446b771342b713e07f4acb65267ef0679ef125566f0de980a5ac69b427e4f799
3
+ size 151224453
rng_state.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:972139d83957a9cf2600cb6eeca17287d7a5377c33a53500ae7e13fe830ad36b
3
+ size 14575
scheduler.pt ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f03c5a9d9fd80414287e17e83b0d9b80cfdcf5ad7a4a9d63da800e7a44f10384
3
+ size 627
special_tokens_map.json ADDED
@@ -0,0 +1,11 @@
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<unk>",
4
+ "<s>",
5
+ "</s>"
6
+ ],
7
+ "bos_token": "<s>",
8
+ "eos_token": "</s>",
9
+ "pad_token": "</s>",
10
+ "unk_token": "<unk>"
11
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,44 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "added_tokens_decoder": {
3
+ "0": {
4
+ "content": "<unk>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false,
9
+ "special": true
10
+ },
11
+ "1": {
12
+ "content": "<s>",
13
+ "lstrip": false,
14
+ "normalized": false,
15
+ "rstrip": false,
16
+ "single_word": false,
17
+ "special": true
18
+ },
19
+ "2": {
20
+ "content": "</s>",
21
+ "lstrip": false,
22
+ "normalized": false,
23
+ "rstrip": false,
24
+ "single_word": false,
25
+ "special": true
26
+ }
27
+ },
28
+ "additional_special_tokens": [
29
+ "<unk>",
30
+ "<s>",
31
+ "</s>"
32
+ ],
33
+ "bos_token": "<s>",
34
+ "clean_up_tokenization_spaces": false,
35
+ "eos_token": "</s>",
36
+ "legacy": true,
37
+ "model_max_length": 1000000000000000019884624838656,
38
+ "pad_token": "</s>",
39
+ "sp_model_kwargs": {},
40
+ "spaces_between_special_tokens": false,
41
+ "tokenizer_class": "LlamaTokenizer",
42
+ "unk_token": "<unk>",
43
+ "use_default_system_prompt": true
44
+ }
trainer_state.json ADDED
@@ -0,0 +1,751 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 0.09665104141497124,
5
+ "eval_steps": 200,
6
+ "global_step": 500,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.0,
13
+ "learning_rate": 9.652509652509653e-06,
14
+ "logits/chosen": -3.11246657371521,
15
+ "logits/rejected": -3.086373805999756,
16
+ "logps/chosen": -113.73238372802734,
17
+ "logps/rejected": -109.32698822021484,
18
+ "loss": 0.721,
19
+ "rewards/accuracies": 0.44999998807907104,
20
+ "rewards/chosen": -0.2993558943271637,
21
+ "rewards/margins": -0.0277109295129776,
22
+ "rewards/rejected": -0.2716449797153473,
23
+ "step": 10
24
+ },
25
+ {
26
+ "epoch": 0.0,
27
+ "learning_rate": 1.9305019305019306e-05,
28
+ "logits/chosen": -3.110931873321533,
29
+ "logits/rejected": -3.1171531677246094,
30
+ "logps/chosen": -122.4663314819336,
31
+ "logps/rejected": -113.23054504394531,
32
+ "loss": 0.7161,
33
+ "rewards/accuracies": 0.42500001192092896,
34
+ "rewards/chosen": -0.2805718183517456,
35
+ "rewards/margins": -0.03167964145541191,
36
+ "rewards/rejected": -0.2488921880722046,
37
+ "step": 20
38
+ },
39
+ {
40
+ "epoch": 0.01,
41
+ "learning_rate": 2.895752895752896e-05,
42
+ "logits/chosen": -3.139052629470825,
43
+ "logits/rejected": -3.1156527996063232,
44
+ "logps/chosen": -126.01689147949219,
45
+ "logps/rejected": -100.77046203613281,
46
+ "loss": 0.7224,
47
+ "rewards/accuracies": 0.4625000059604645,
48
+ "rewards/chosen": -0.3138067126274109,
49
+ "rewards/margins": -0.038806475698947906,
50
+ "rewards/rejected": -0.2750001847743988,
51
+ "step": 30
52
+ },
53
+ {
54
+ "epoch": 0.01,
55
+ "learning_rate": 3.764478764478765e-05,
56
+ "logits/chosen": -3.155150890350342,
57
+ "logits/rejected": -3.1715970039367676,
58
+ "logps/chosen": -133.27737426757812,
59
+ "logps/rejected": -118.9439926147461,
60
+ "loss": 0.713,
61
+ "rewards/accuracies": 0.4625000059604645,
62
+ "rewards/chosen": -0.2480003386735916,
63
+ "rewards/margins": -0.023857835680246353,
64
+ "rewards/rejected": -0.22414250671863556,
65
+ "step": 40
66
+ },
67
+ {
68
+ "epoch": 0.01,
69
+ "learning_rate": 4.72972972972973e-05,
70
+ "logits/chosen": -3.1669116020202637,
71
+ "logits/rejected": -3.1525278091430664,
72
+ "logps/chosen": -123.1195297241211,
73
+ "logps/rejected": -128.38714599609375,
74
+ "loss": 0.6781,
75
+ "rewards/accuracies": 0.6000000238418579,
76
+ "rewards/chosen": -0.19161827862262726,
77
+ "rewards/margins": 0.04476013034582138,
78
+ "rewards/rejected": -0.23637838661670685,
79
+ "step": 50
80
+ },
81
+ {
82
+ "epoch": 0.01,
83
+ "learning_rate": 5.694980694980695e-05,
84
+ "logits/chosen": -3.1426501274108887,
85
+ "logits/rejected": -3.132570266723633,
86
+ "logps/chosen": -120.64261627197266,
87
+ "logps/rejected": -113.0268783569336,
88
+ "loss": 0.7107,
89
+ "rewards/accuracies": 0.4749999940395355,
90
+ "rewards/chosen": -0.3047412037849426,
91
+ "rewards/margins": -0.014080168679356575,
92
+ "rewards/rejected": -0.2906610071659088,
93
+ "step": 60
94
+ },
95
+ {
96
+ "epoch": 0.01,
97
+ "learning_rate": 6.660231660231661e-05,
98
+ "logits/chosen": -3.170804500579834,
99
+ "logits/rejected": -3.164586305618286,
100
+ "logps/chosen": -116.2149887084961,
101
+ "logps/rejected": -126.68898010253906,
102
+ "loss": 0.6885,
103
+ "rewards/accuracies": 0.5249999761581421,
104
+ "rewards/chosen": -0.33613839745521545,
105
+ "rewards/margins": 0.028235793113708496,
106
+ "rewards/rejected": -0.36437422037124634,
107
+ "step": 70
108
+ },
109
+ {
110
+ "epoch": 0.02,
111
+ "learning_rate": 7.625482625482626e-05,
112
+ "logits/chosen": -3.17895245552063,
113
+ "logits/rejected": -3.1590020656585693,
114
+ "logps/chosen": -126.33811950683594,
115
+ "logps/rejected": -103.02183532714844,
116
+ "loss": 0.6215,
117
+ "rewards/accuracies": 0.675000011920929,
118
+ "rewards/chosen": -0.4042009711265564,
119
+ "rewards/margins": 0.19598612189292908,
120
+ "rewards/rejected": -0.6001870632171631,
121
+ "step": 80
122
+ },
123
+ {
124
+ "epoch": 0.02,
125
+ "learning_rate": 8.59073359073359e-05,
126
+ "logits/chosen": -3.180785894393921,
127
+ "logits/rejected": -3.1576857566833496,
128
+ "logps/chosen": -129.47866821289062,
129
+ "logps/rejected": -126.99539947509766,
130
+ "loss": 0.6889,
131
+ "rewards/accuracies": 0.5249999761581421,
132
+ "rewards/chosen": -0.8047823905944824,
133
+ "rewards/margins": 0.08432246744632721,
134
+ "rewards/rejected": -0.8891048431396484,
135
+ "step": 90
136
+ },
137
+ {
138
+ "epoch": 0.02,
139
+ "learning_rate": 9.555984555984557e-05,
140
+ "logits/chosen": -3.139349937438965,
141
+ "logits/rejected": -3.114441394805908,
142
+ "logps/chosen": -139.87002563476562,
143
+ "logps/rejected": -135.06802368164062,
144
+ "loss": 0.6326,
145
+ "rewards/accuracies": 0.6625000238418579,
146
+ "rewards/chosen": -1.2678108215332031,
147
+ "rewards/margins": 0.2349836528301239,
148
+ "rewards/rejected": -1.5027945041656494,
149
+ "step": 100
150
+ },
151
+ {
152
+ "epoch": 0.02,
153
+ "learning_rate": 0.00010424710424710426,
154
+ "logits/chosen": -3.155695676803589,
155
+ "logits/rejected": -3.1192359924316406,
156
+ "logps/chosen": -126.76655578613281,
157
+ "logps/rejected": -119.91800689697266,
158
+ "loss": 0.6925,
159
+ "rewards/accuracies": 0.5874999761581421,
160
+ "rewards/chosen": -1.1118037700653076,
161
+ "rewards/margins": 0.12902173399925232,
162
+ "rewards/rejected": -1.2408255338668823,
163
+ "step": 110
164
+ },
165
+ {
166
+ "epoch": 0.02,
167
+ "learning_rate": 0.0001138996138996139,
168
+ "logits/chosen": -3.203996181488037,
169
+ "logits/rejected": -3.181511402130127,
170
+ "logps/chosen": -121.38505554199219,
171
+ "logps/rejected": -123.63691711425781,
172
+ "loss": 0.6905,
173
+ "rewards/accuracies": 0.5625,
174
+ "rewards/chosen": -0.83983314037323,
175
+ "rewards/margins": 0.14224112033843994,
176
+ "rewards/rejected": -0.9820743799209595,
177
+ "step": 120
178
+ },
179
+ {
180
+ "epoch": 0.03,
181
+ "learning_rate": 0.00012355212355212355,
182
+ "logits/chosen": -3.199700355529785,
183
+ "logits/rejected": -3.142176628112793,
184
+ "logps/chosen": -144.78311157226562,
185
+ "logps/rejected": -131.46128845214844,
186
+ "loss": 0.7421,
187
+ "rewards/accuracies": 0.550000011920929,
188
+ "rewards/chosen": -0.7906621694564819,
189
+ "rewards/margins": 0.08599194139242172,
190
+ "rewards/rejected": -0.8766541481018066,
191
+ "step": 130
192
+ },
193
+ {
194
+ "epoch": 0.03,
195
+ "learning_rate": 0.00013223938223938227,
196
+ "logits/chosen": -3.11432147026062,
197
+ "logits/rejected": -3.0819637775421143,
198
+ "logps/chosen": -131.96109008789062,
199
+ "logps/rejected": -118.2151107788086,
200
+ "loss": 0.7438,
201
+ "rewards/accuracies": 0.5249999761581421,
202
+ "rewards/chosen": -0.9345771074295044,
203
+ "rewards/margins": 0.03206203132867813,
204
+ "rewards/rejected": -0.9666391611099243,
205
+ "step": 140
206
+ },
207
+ {
208
+ "epoch": 0.03,
209
+ "learning_rate": 0.00014189189189189188,
210
+ "logits/chosen": -3.103154182434082,
211
+ "logits/rejected": -3.0087850093841553,
212
+ "logps/chosen": -111.38960266113281,
213
+ "logps/rejected": -109.3032455444336,
214
+ "loss": 0.6658,
215
+ "rewards/accuracies": 0.625,
216
+ "rewards/chosen": -0.4318141043186188,
217
+ "rewards/margins": 0.15236088633537292,
218
+ "rewards/rejected": -0.5841749906539917,
219
+ "step": 150
220
+ },
221
+ {
222
+ "epoch": 0.03,
223
+ "learning_rate": 0.00015154440154440155,
224
+ "logits/chosen": -3.065882444381714,
225
+ "logits/rejected": -3.014258623123169,
226
+ "logps/chosen": -115.94306945800781,
227
+ "logps/rejected": -129.7348175048828,
228
+ "loss": 0.7302,
229
+ "rewards/accuracies": 0.5625,
230
+ "rewards/chosen": -0.6290556192398071,
231
+ "rewards/margins": 0.06416401267051697,
232
+ "rewards/rejected": -0.6932196021080017,
233
+ "step": 160
234
+ },
235
+ {
236
+ "epoch": 0.03,
237
+ "learning_rate": 0.0001611969111969112,
238
+ "logits/chosen": -2.8696858882904053,
239
+ "logits/rejected": -2.820652723312378,
240
+ "logps/chosen": -121.01307678222656,
241
+ "logps/rejected": -122.356201171875,
242
+ "loss": 0.7221,
243
+ "rewards/accuracies": 0.5249999761581421,
244
+ "rewards/chosen": -0.7266199588775635,
245
+ "rewards/margins": 0.14146149158477783,
246
+ "rewards/rejected": -0.8680814504623413,
247
+ "step": 170
248
+ },
249
+ {
250
+ "epoch": 0.03,
251
+ "learning_rate": 0.00017084942084942084,
252
+ "logits/chosen": -2.89375638961792,
253
+ "logits/rejected": -2.8223800659179688,
254
+ "logps/chosen": -131.37777709960938,
255
+ "logps/rejected": -125.69004821777344,
256
+ "loss": 0.583,
257
+ "rewards/accuracies": 0.699999988079071,
258
+ "rewards/chosen": -0.9062817692756653,
259
+ "rewards/margins": 0.5299333333969116,
260
+ "rewards/rejected": -1.4362150430679321,
261
+ "step": 180
262
+ },
263
+ {
264
+ "epoch": 0.04,
265
+ "learning_rate": 0.0001805019305019305,
266
+ "logits/chosen": -2.8086953163146973,
267
+ "logits/rejected": -2.8810436725616455,
268
+ "logps/chosen": -115.71038818359375,
269
+ "logps/rejected": -133.2216339111328,
270
+ "loss": 0.7593,
271
+ "rewards/accuracies": 0.5625,
272
+ "rewards/chosen": -1.390483021736145,
273
+ "rewards/margins": 0.12736426293849945,
274
+ "rewards/rejected": -1.5178472995758057,
275
+ "step": 190
276
+ },
277
+ {
278
+ "epoch": 0.04,
279
+ "learning_rate": 0.00019015444015444015,
280
+ "logits/chosen": -3.0167882442474365,
281
+ "logits/rejected": -2.996938943862915,
282
+ "logps/chosen": -118.67408752441406,
283
+ "logps/rejected": -106.15169525146484,
284
+ "loss": 0.672,
285
+ "rewards/accuracies": 0.5874999761581421,
286
+ "rewards/chosen": -0.7122364640235901,
287
+ "rewards/margins": 0.25746825337409973,
288
+ "rewards/rejected": -0.9697047472000122,
289
+ "step": 200
290
+ },
291
+ {
292
+ "epoch": 0.04,
293
+ "eval_logits/chosen": -3.1546952724456787,
294
+ "eval_logits/rejected": -3.1338424682617188,
295
+ "eval_logps/chosen": -127.55575561523438,
296
+ "eval_logps/rejected": -128.7761993408203,
297
+ "eval_loss": 0.743442177772522,
298
+ "eval_rewards/accuracies": 0.5858798623085022,
299
+ "eval_rewards/chosen": -1.0754988193511963,
300
+ "eval_rewards/margins": 0.3047899305820465,
301
+ "eval_rewards/rejected": -1.3802887201309204,
302
+ "eval_runtime": 1335.2681,
303
+ "eval_samples_per_second": 0.711,
304
+ "eval_steps_per_second": 0.711,
305
+ "step": 200
306
+ },
307
+ {
308
+ "epoch": 0.04,
309
+ "learning_rate": 0.0001998069498069498,
310
+ "logits/chosen": -3.2106406688690186,
311
+ "logits/rejected": -3.1746304035186768,
312
+ "logps/chosen": -131.38128662109375,
313
+ "logps/rejected": -121.36234283447266,
314
+ "loss": 0.664,
315
+ "rewards/accuracies": 0.637499988079071,
316
+ "rewards/chosen": -1.005927324295044,
317
+ "rewards/margins": 0.5132917165756226,
318
+ "rewards/rejected": -1.5192190408706665,
319
+ "step": 210
320
+ },
321
+ {
322
+ "epoch": 0.04,
323
+ "learning_rate": 0.00020945945945945947,
324
+ "logits/chosen": -3.2851333618164062,
325
+ "logits/rejected": -3.267256259918213,
326
+ "logps/chosen": -120.11392974853516,
327
+ "logps/rejected": -117.89964294433594,
328
+ "loss": 0.6965,
329
+ "rewards/accuracies": 0.612500011920929,
330
+ "rewards/chosen": -0.7865055203437805,
331
+ "rewards/margins": 0.19706687331199646,
332
+ "rewards/rejected": -0.9835723638534546,
333
+ "step": 220
334
+ },
335
+ {
336
+ "epoch": 0.04,
337
+ "learning_rate": 0.0002191119691119691,
338
+ "logits/chosen": -3.408318281173706,
339
+ "logits/rejected": -3.351940870285034,
340
+ "logps/chosen": -107.76014709472656,
341
+ "logps/rejected": -107.02482604980469,
342
+ "loss": 0.7385,
343
+ "rewards/accuracies": 0.512499988079071,
344
+ "rewards/chosen": -0.41882553696632385,
345
+ "rewards/margins": 0.01567765511572361,
346
+ "rewards/rejected": -0.4345032274723053,
347
+ "step": 230
348
+ },
349
+ {
350
+ "epoch": 0.05,
351
+ "learning_rate": 0.00022876447876447875,
352
+ "logits/chosen": -3.155478000640869,
353
+ "logits/rejected": -3.135077953338623,
354
+ "logps/chosen": -131.94711303710938,
355
+ "logps/rejected": -112.65836334228516,
356
+ "loss": 0.7328,
357
+ "rewards/accuracies": 0.5375000238418579,
358
+ "rewards/chosen": -0.6660552024841309,
359
+ "rewards/margins": 0.10303208976984024,
360
+ "rewards/rejected": -0.7690872550010681,
361
+ "step": 240
362
+ },
363
+ {
364
+ "epoch": 0.05,
365
+ "learning_rate": 0.00023841698841698842,
366
+ "logits/chosen": -3.290839433670044,
367
+ "logits/rejected": -3.2837767601013184,
368
+ "logps/chosen": -132.83676147460938,
369
+ "logps/rejected": -119.4383773803711,
370
+ "loss": 0.678,
371
+ "rewards/accuracies": 0.5874999761581421,
372
+ "rewards/chosen": -0.9368747472763062,
373
+ "rewards/margins": 0.25725504755973816,
374
+ "rewards/rejected": -1.1941298246383667,
375
+ "step": 250
376
+ },
377
+ {
378
+ "epoch": 0.05,
379
+ "learning_rate": 0.0002480694980694981,
380
+ "logits/chosen": -3.3188934326171875,
381
+ "logits/rejected": -3.361811876296997,
382
+ "logps/chosen": -130.7174835205078,
383
+ "logps/rejected": -147.4563751220703,
384
+ "loss": 0.7069,
385
+ "rewards/accuracies": 0.574999988079071,
386
+ "rewards/chosen": -0.9200389981269836,
387
+ "rewards/margins": 0.4393937587738037,
388
+ "rewards/rejected": -1.3594326972961426,
389
+ "step": 260
390
+ },
391
+ {
392
+ "epoch": 0.05,
393
+ "learning_rate": 0.0002567567567567567,
394
+ "logits/chosen": -3.249516010284424,
395
+ "logits/rejected": -3.2256407737731934,
396
+ "logps/chosen": -108.8515853881836,
397
+ "logps/rejected": -132.24813842773438,
398
+ "loss": 0.6983,
399
+ "rewards/accuracies": 0.612500011920929,
400
+ "rewards/chosen": -0.42829591035842896,
401
+ "rewards/margins": 0.3723019063472748,
402
+ "rewards/rejected": -0.8005977869033813,
403
+ "step": 270
404
+ },
405
+ {
406
+ "epoch": 0.05,
407
+ "learning_rate": 0.0002635135135135135,
408
+ "logits/chosen": -3.0320539474487305,
409
+ "logits/rejected": -2.982640027999878,
410
+ "logps/chosen": -175.3157501220703,
411
+ "logps/rejected": -154.48963928222656,
412
+ "loss": 1.4131,
413
+ "rewards/accuracies": 0.512499988079071,
414
+ "rewards/chosen": -2.973146915435791,
415
+ "rewards/margins": -0.0525052547454834,
416
+ "rewards/rejected": -2.9206414222717285,
417
+ "step": 280
418
+ },
419
+ {
420
+ "epoch": 0.06,
421
+ "learning_rate": 0.00027316602316602317,
422
+ "logits/chosen": -2.9082584381103516,
423
+ "logits/rejected": -2.8451313972473145,
424
+ "logps/chosen": -129.400146484375,
425
+ "logps/rejected": -118.70863342285156,
426
+ "loss": 0.9237,
427
+ "rewards/accuracies": 0.48750001192092896,
428
+ "rewards/chosen": -1.8987582921981812,
429
+ "rewards/margins": 0.024892251938581467,
430
+ "rewards/rejected": -1.9236505031585693,
431
+ "step": 290
432
+ },
433
+ {
434
+ "epoch": 0.06,
435
+ "learning_rate": 0.00028281853281853284,
436
+ "logits/chosen": -3.066049337387085,
437
+ "logits/rejected": -2.9455220699310303,
438
+ "logps/chosen": -131.84317016601562,
439
+ "logps/rejected": -121.24568176269531,
440
+ "loss": 0.6998,
441
+ "rewards/accuracies": 0.574999988079071,
442
+ "rewards/chosen": -1.1568044424057007,
443
+ "rewards/margins": 0.3450776934623718,
444
+ "rewards/rejected": -1.5018823146820068,
445
+ "step": 300
446
+ },
447
+ {
448
+ "epoch": 0.06,
449
+ "learning_rate": 0.0002924710424710425,
450
+ "logits/chosen": -2.8079309463500977,
451
+ "logits/rejected": -2.773160457611084,
452
+ "logps/chosen": -169.48828125,
453
+ "logps/rejected": -148.60374450683594,
454
+ "loss": 2.3505,
455
+ "rewards/accuracies": 0.5375000238418579,
456
+ "rewards/chosen": -5.812082290649414,
457
+ "rewards/margins": -0.6856533288955688,
458
+ "rewards/rejected": -5.126428127288818,
459
+ "step": 310
460
+ },
461
+ {
462
+ "epoch": 0.06,
463
+ "learning_rate": 0.0003021235521235521,
464
+ "logits/chosen": -2.6579480171203613,
465
+ "logits/rejected": -2.6659698486328125,
466
+ "logps/chosen": -141.78701782226562,
467
+ "logps/rejected": -160.35110473632812,
468
+ "loss": 0.8982,
469
+ "rewards/accuracies": 0.4749999940395355,
470
+ "rewards/chosen": -3.301175594329834,
471
+ "rewards/margins": 0.01409349124878645,
472
+ "rewards/rejected": -3.3152689933776855,
473
+ "step": 320
474
+ },
475
+ {
476
+ "epoch": 0.06,
477
+ "learning_rate": 0.0003117760617760618,
478
+ "logits/chosen": -2.8775956630706787,
479
+ "logits/rejected": -2.8255507946014404,
480
+ "logps/chosen": -150.46177673339844,
481
+ "logps/rejected": -144.97073364257812,
482
+ "loss": 0.9436,
483
+ "rewards/accuracies": 0.48750001192092896,
484
+ "rewards/chosen": -3.29494047164917,
485
+ "rewards/margins": 0.0633418932557106,
486
+ "rewards/rejected": -3.3582825660705566,
487
+ "step": 330
488
+ },
489
+ {
490
+ "epoch": 0.07,
491
+ "learning_rate": 0.0003204633204633205,
492
+ "logits/chosen": -2.8166918754577637,
493
+ "logits/rejected": -2.818556308746338,
494
+ "logps/chosen": -171.34437561035156,
495
+ "logps/rejected": -172.5870361328125,
496
+ "loss": 1.0895,
497
+ "rewards/accuracies": 0.574999988079071,
498
+ "rewards/chosen": -4.722414016723633,
499
+ "rewards/margins": 0.23760518431663513,
500
+ "rewards/rejected": -4.960019111633301,
501
+ "step": 340
502
+ },
503
+ {
504
+ "epoch": 0.07,
505
+ "learning_rate": 0.00033011583011583015,
506
+ "logits/chosen": -2.9005284309387207,
507
+ "logits/rejected": -2.905046224594116,
508
+ "logps/chosen": -179.5493621826172,
509
+ "logps/rejected": -189.4880828857422,
510
+ "loss": 1.4019,
511
+ "rewards/accuracies": 0.512499988079071,
512
+ "rewards/chosen": -4.879184722900391,
513
+ "rewards/margins": 0.2637065649032593,
514
+ "rewards/rejected": -5.142890930175781,
515
+ "step": 350
516
+ },
517
+ {
518
+ "epoch": 0.07,
519
+ "learning_rate": 0.00033976833976833977,
520
+ "logits/chosen": -2.4955551624298096,
521
+ "logits/rejected": -2.5379605293273926,
522
+ "logps/chosen": -142.00270080566406,
523
+ "logps/rejected": -147.41220092773438,
524
+ "loss": 0.7586,
525
+ "rewards/accuracies": 0.625,
526
+ "rewards/chosen": -3.0069820880889893,
527
+ "rewards/margins": 0.49537187814712524,
528
+ "rewards/rejected": -3.502354383468628,
529
+ "step": 360
530
+ },
531
+ {
532
+ "epoch": 0.07,
533
+ "learning_rate": 0.00034942084942084944,
534
+ "logits/chosen": -2.451601266860962,
535
+ "logits/rejected": -2.4406824111938477,
536
+ "logps/chosen": -177.74746704101562,
537
+ "logps/rejected": -181.7180633544922,
538
+ "loss": 1.6042,
539
+ "rewards/accuracies": 0.512499988079071,
540
+ "rewards/chosen": -6.1398024559021,
541
+ "rewards/margins": -0.07387089729309082,
542
+ "rewards/rejected": -6.065931797027588,
543
+ "step": 370
544
+ },
545
+ {
546
+ "epoch": 0.07,
547
+ "learning_rate": 0.0003590733590733591,
548
+ "logits/chosen": -2.4471051692962646,
549
+ "logits/rejected": -2.409393548965454,
550
+ "logps/chosen": -182.06051635742188,
551
+ "logps/rejected": -163.97035217285156,
552
+ "loss": 2.4314,
553
+ "rewards/accuracies": 0.4375,
554
+ "rewards/chosen": -6.733994483947754,
555
+ "rewards/margins": -1.0469824075698853,
556
+ "rewards/rejected": -5.687012672424316,
557
+ "step": 380
558
+ },
559
+ {
560
+ "epoch": 0.08,
561
+ "learning_rate": 0.0003687258687258687,
562
+ "logits/chosen": -2.8171439170837402,
563
+ "logits/rejected": -2.7251698970794678,
564
+ "logps/chosen": -160.04652404785156,
565
+ "logps/rejected": -142.2588348388672,
566
+ "loss": 1.0093,
567
+ "rewards/accuracies": 0.574999988079071,
568
+ "rewards/chosen": -3.551713466644287,
569
+ "rewards/margins": 0.07490300387144089,
570
+ "rewards/rejected": -3.6266167163848877,
571
+ "step": 390
572
+ },
573
+ {
574
+ "epoch": 0.08,
575
+ "learning_rate": 0.0003783783783783784,
576
+ "logits/chosen": -2.8222527503967285,
577
+ "logits/rejected": -2.8754923343658447,
578
+ "logps/chosen": -138.70736694335938,
579
+ "logps/rejected": -142.30128479003906,
580
+ "loss": 0.945,
581
+ "rewards/accuracies": 0.512499988079071,
582
+ "rewards/chosen": -3.170539140701294,
583
+ "rewards/margins": 0.011356920003890991,
584
+ "rewards/rejected": -3.1818957328796387,
585
+ "step": 400
586
+ },
587
+ {
588
+ "epoch": 0.08,
589
+ "eval_logits/chosen": -2.9538896083831787,
590
+ "eval_logits/rejected": -2.8971762657165527,
591
+ "eval_logps/chosen": -152.2897491455078,
592
+ "eval_logps/rejected": -150.13941955566406,
593
+ "eval_loss": 1.0823436975479126,
594
+ "eval_rewards/accuracies": 0.5100105404853821,
595
+ "eval_rewards/chosen": -3.548898458480835,
596
+ "eval_rewards/margins": -0.032288454473018646,
597
+ "eval_rewards/rejected": -3.5166099071502686,
598
+ "eval_runtime": 1347.2142,
599
+ "eval_samples_per_second": 0.704,
600
+ "eval_steps_per_second": 0.704,
601
+ "step": 400
602
+ },
603
+ {
604
+ "epoch": 0.08,
605
+ "learning_rate": 0.00038803088803088807,
606
+ "logits/chosen": -2.8151559829711914,
607
+ "logits/rejected": -2.8057456016540527,
608
+ "logps/chosen": -145.69772338867188,
609
+ "logps/rejected": -156.96774291992188,
610
+ "loss": 0.8857,
611
+ "rewards/accuracies": 0.574999988079071,
612
+ "rewards/chosen": -3.8467044830322266,
613
+ "rewards/margins": 0.3242764472961426,
614
+ "rewards/rejected": -4.170981407165527,
615
+ "step": 410
616
+ },
617
+ {
618
+ "epoch": 0.08,
619
+ "learning_rate": 0.0003976833976833977,
620
+ "logits/chosen": -2.7274651527404785,
621
+ "logits/rejected": -2.713927745819092,
622
+ "logps/chosen": -164.06692504882812,
623
+ "logps/rejected": -149.8723907470703,
624
+ "loss": 0.9745,
625
+ "rewards/accuracies": 0.512499988079071,
626
+ "rewards/chosen": -3.5636115074157715,
627
+ "rewards/margins": 0.0345739908516407,
628
+ "rewards/rejected": -3.5981857776641846,
629
+ "step": 420
630
+ },
631
+ {
632
+ "epoch": 0.08,
633
+ "learning_rate": 0.00040733590733590735,
634
+ "logits/chosen": -2.8782758712768555,
635
+ "logits/rejected": -2.8031697273254395,
636
+ "logps/chosen": -165.91473388671875,
637
+ "logps/rejected": -147.03627014160156,
638
+ "loss": 0.8281,
639
+ "rewards/accuracies": 0.625,
640
+ "rewards/chosen": -4.2417216300964355,
641
+ "rewards/margins": 0.32142549753189087,
642
+ "rewards/rejected": -4.563147068023682,
643
+ "step": 430
644
+ },
645
+ {
646
+ "epoch": 0.09,
647
+ "learning_rate": 0.000416988416988417,
648
+ "logits/chosen": -2.623711585998535,
649
+ "logits/rejected": -2.622528314590454,
650
+ "logps/chosen": -149.8426513671875,
651
+ "logps/rejected": -159.93692016601562,
652
+ "loss": 0.9961,
653
+ "rewards/accuracies": 0.5,
654
+ "rewards/chosen": -4.433084487915039,
655
+ "rewards/margins": 0.28495556116104126,
656
+ "rewards/rejected": -4.7180399894714355,
657
+ "step": 440
658
+ },
659
+ {
660
+ "epoch": 0.09,
661
+ "learning_rate": 0.00042664092664092664,
662
+ "logits/chosen": -2.580371618270874,
663
+ "logits/rejected": -2.5741231441497803,
664
+ "logps/chosen": -160.84347534179688,
665
+ "logps/rejected": -141.23475646972656,
666
+ "loss": 1.2914,
667
+ "rewards/accuracies": 0.4625000059604645,
668
+ "rewards/chosen": -4.076364994049072,
669
+ "rewards/margins": -0.23183032870292664,
670
+ "rewards/rejected": -3.8445351123809814,
671
+ "step": 450
672
+ },
673
+ {
674
+ "epoch": 0.09,
675
+ "learning_rate": 0.0004362934362934363,
676
+ "logits/chosen": -2.889563798904419,
677
+ "logits/rejected": -2.8342082500457764,
678
+ "logps/chosen": -186.77017211914062,
679
+ "logps/rejected": -168.42330932617188,
680
+ "loss": 1.1317,
681
+ "rewards/accuracies": 0.44999998807907104,
682
+ "rewards/chosen": -4.674792289733887,
683
+ "rewards/margins": -0.265504390001297,
684
+ "rewards/rejected": -4.409287929534912,
685
+ "step": 460
686
+ },
687
+ {
688
+ "epoch": 0.09,
689
+ "learning_rate": 0.000445945945945946,
690
+ "logits/chosen": -2.3731606006622314,
691
+ "logits/rejected": -2.344404697418213,
692
+ "logps/chosen": -172.8909454345703,
693
+ "logps/rejected": -175.9696502685547,
694
+ "loss": 0.9674,
695
+ "rewards/accuracies": 0.574999988079071,
696
+ "rewards/chosen": -5.584943771362305,
697
+ "rewards/margins": 0.14772634208202362,
698
+ "rewards/rejected": -5.732670783996582,
699
+ "step": 470
700
+ },
701
+ {
702
+ "epoch": 0.09,
703
+ "learning_rate": 0.0004555984555984556,
704
+ "logits/chosen": -2.3436319828033447,
705
+ "logits/rejected": -2.301845073699951,
706
+ "logps/chosen": -173.07313537597656,
707
+ "logps/rejected": -169.7339630126953,
708
+ "loss": 1.2097,
709
+ "rewards/accuracies": 0.4749999940395355,
710
+ "rewards/chosen": -5.504385471343994,
711
+ "rewards/margins": 0.23244301974773407,
712
+ "rewards/rejected": -5.736828804016113,
713
+ "step": 480
714
+ },
715
+ {
716
+ "epoch": 0.09,
717
+ "learning_rate": 0.00046525096525096526,
718
+ "logits/chosen": -2.6778111457824707,
719
+ "logits/rejected": -2.5807526111602783,
720
+ "logps/chosen": -166.180419921875,
721
+ "logps/rejected": -168.2677001953125,
722
+ "loss": 1.2799,
723
+ "rewards/accuracies": 0.512499988079071,
724
+ "rewards/chosen": -4.63643217086792,
725
+ "rewards/margins": 0.5320998430252075,
726
+ "rewards/rejected": -5.168532371520996,
727
+ "step": 490
728
+ },
729
+ {
730
+ "epoch": 0.1,
731
+ "learning_rate": 0.00047490347490347493,
732
+ "logits/chosen": -3.083743095397949,
733
+ "logits/rejected": -3.080765962600708,
734
+ "logps/chosen": -193.96841430664062,
735
+ "logps/rejected": -182.3042755126953,
736
+ "loss": 1.6246,
737
+ "rewards/accuracies": 0.5,
738
+ "rewards/chosen": -6.152979850769043,
739
+ "rewards/margins": -0.20669928193092346,
740
+ "rewards/rejected": -5.946280479431152,
741
+ "step": 500
742
+ }
743
+ ],
744
+ "logging_steps": 10,
745
+ "max_steps": 5173,
746
+ "num_train_epochs": 1,
747
+ "save_steps": 100,
748
+ "total_flos": 0.0,
749
+ "trial_name": null,
750
+ "trial_params": null
751
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b2d24f52f3de130e83c614d71d9b7667bb85da1de04a98bd39d203115dbe7af7
3
+ size 4091