alexredna commited on
Commit
c686a6b
·
verified ·
1 Parent(s): 1f17702

Model save

Browse files
README.md ADDED
@@ -0,0 +1,92 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ library_name: peft
3
+ tags:
4
+ - trl
5
+ - dpo
6
+ - generated_from_trainer
7
+ base_model: alexredna/Tukan-1.1B-Chat-v0.6
8
+ model-index:
9
+ - name: Tukan-1.1B-Chat-v0.6_dpo
10
+ results: []
11
+ ---
12
+
13
+ <!-- This model card has been generated automatically according to the information the Trainer had access to. You
14
+ should probably proofread and complete it, then remove this comment. -->
15
+
16
+ # Tukan-1.1B-Chat-v0.6_dpo
17
+
18
+ This model is a fine-tuned version of [alexredna/Tukan-1.1B-Chat-v0.6](https://huggingface.co/alexredna/Tukan-1.1B-Chat-v0.6) on the None dataset.
19
+ It achieves the following results on the evaluation set:
20
+ - Loss: 0.6664
21
+ - Rewards/chosen: -0.0698
22
+ - Rewards/rejected: -0.1297
23
+ - Rewards/accuracies: 0.6667
24
+ - Rewards/margins: 0.0600
25
+ - Logps/rejected: -364.0182
26
+ - Logps/chosen: -408.6165
27
+ - Logits/rejected: -2.1219
28
+ - Logits/chosen: -2.2568
29
+
30
+ ## Model description
31
+
32
+ More information needed
33
+
34
+ ## Intended uses & limitations
35
+
36
+ More information needed
37
+
38
+ ## Training and evaluation data
39
+
40
+ More information needed
41
+
42
+ ## Training procedure
43
+
44
+ ### Training hyperparameters
45
+
46
+ The following hyperparameters were used during training:
47
+ - learning_rate: 3e-07
48
+ - train_batch_size: 5
49
+ - eval_batch_size: 3
50
+ - seed: 42
51
+ - distributed_type: multi-GPU
52
+ - gradient_accumulation_steps: 32
53
+ - total_train_batch_size: 160
54
+ - optimizer: Adam with betas=(0.9,0.999) and epsilon=1e-08
55
+ - lr_scheduler_type: linear
56
+ - lr_scheduler_warmup_ratio: 0.1
57
+ - num_epochs: 3
58
+
59
+ ### Training results
60
+
61
+ | Training Loss | Epoch | Step | Validation Loss | Rewards/chosen | Rewards/rejected | Rewards/accuracies | Rewards/margins | Logps/rejected | Logps/chosen | Logits/rejected | Logits/chosen |
62
+ |:-------------:|:-----:|:----:|:---------------:|:--------------:|:----------------:|:------------------:|:---------------:|:--------------:|:------------:|:---------------:|:-------------:|
63
+ | 0.6819 | 0.99 | 61 | 0.6804 | -0.0465 | -0.0746 | 0.6318 | 0.0281 | -363.4672 | -408.3842 | -2.1356 | -2.2671 |
64
+ | 0.6718 | 2.0 | 123 | 0.6719 | -0.0689 | -0.1170 | 0.6269 | 0.0482 | -363.8911 | -408.6075 | -2.1242 | -2.2588 |
65
+ | 0.6687 | 2.97 | 183 | 0.6664 | -0.0698 | -0.1297 | 0.6667 | 0.0600 | -364.0182 | -408.6165 | -2.1219 | -2.2568 |
66
+
67
+
68
+ ### Framework versions
69
+
70
+ - Transformers 4.36.2
71
+ - Pytorch 2.1.1+cu121
72
+ - Datasets 2.14.6
73
+ - Tokenizers 0.15.0
74
+ ## Training procedure
75
+
76
+
77
+ The following `bitsandbytes` quantization config was used during training:
78
+ - quant_method: bitsandbytes
79
+ - load_in_8bit: False
80
+ - load_in_4bit: True
81
+ - llm_int8_threshold: 6.0
82
+ - llm_int8_skip_modules: None
83
+ - llm_int8_enable_fp32_cpu_offload: False
84
+ - llm_int8_has_fp16_weight: False
85
+ - bnb_4bit_quant_type: nf4
86
+ - bnb_4bit_use_double_quant: False
87
+ - bnb_4bit_compute_dtype: float16
88
+
89
+ ### Framework versions
90
+
91
+
92
+ - PEFT 0.6.1
adapter_config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "alpha_pattern": {},
3
+ "auto_mapping": null,
4
+ "base_model_name_or_path": "alexredna/Tukan-1.1B-Chat-v0.6",
5
+ "bias": "none",
6
+ "fan_in_fan_out": false,
7
+ "inference_mode": true,
8
+ "init_lora_weights": true,
9
+ "layers_pattern": null,
10
+ "layers_to_transform": null,
11
+ "lora_alpha": 64,
12
+ "lora_dropout": 0.05,
13
+ "modules_to_save": null,
14
+ "peft_type": "LORA",
15
+ "r": 64,
16
+ "rank_pattern": {},
17
+ "revision": null,
18
+ "target_modules": [
19
+ "q_proj",
20
+ "k_proj",
21
+ "lm_head",
22
+ "o_proj",
23
+ "gate_proj",
24
+ "down_proj",
25
+ "v_proj",
26
+ "up_proj"
27
+ ],
28
+ "task_type": "CAUSAL_LM"
29
+ }
adapter_model.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac7eff79678968cd1383f8a9706887efddbada917985a7d66a0b16de4652cf2f
3
+ size 210608672
all_results.json ADDED
@@ -0,0 +1,21 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.97,
3
+ "eval_logits/chosen": -2.256798028945923,
4
+ "eval_logits/rejected": -2.121875524520874,
5
+ "eval_logps/chosen": -408.6165466308594,
6
+ "eval_logps/rejected": -364.0181884765625,
7
+ "eval_loss": 0.6663674712181091,
8
+ "eval_rewards/accuracies": 0.6666666865348816,
9
+ "eval_rewards/chosen": -0.06976744532585144,
10
+ "eval_rewards/margins": 0.05996997281908989,
11
+ "eval_rewards/rejected": -0.12973742187023163,
12
+ "eval_runtime": 39.1746,
13
+ "eval_samples": 201,
14
+ "eval_samples_per_second": 5.131,
15
+ "eval_steps_per_second": 1.71,
16
+ "train_loss": 0.6770667407682033,
17
+ "train_runtime": 12894.8655,
18
+ "train_samples": 9845,
19
+ "train_samples_per_second": 2.29,
20
+ "train_steps_per_second": 0.014
21
+ }
eval_results.json ADDED
@@ -0,0 +1,16 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.97,
3
+ "eval_logits/chosen": -2.256798028945923,
4
+ "eval_logits/rejected": -2.121875524520874,
5
+ "eval_logps/chosen": -408.6165466308594,
6
+ "eval_logps/rejected": -364.0181884765625,
7
+ "eval_loss": 0.6663674712181091,
8
+ "eval_rewards/accuracies": 0.6666666865348816,
9
+ "eval_rewards/chosen": -0.06976744532585144,
10
+ "eval_rewards/margins": 0.05996997281908989,
11
+ "eval_rewards/rejected": -0.12973742187023163,
12
+ "eval_runtime": 39.1746,
13
+ "eval_samples": 201,
14
+ "eval_samples_per_second": 5.131,
15
+ "eval_steps_per_second": 1.71
16
+ }
runs/Jan16_16-12-01_b00663e406e8/events.out.tfevents.1705421591.b00663e406e8.761.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bf628bddf30f09ceee5b9cd3c5285e1bce1a84b39b03c238a1adc87f99abd9b1
3
+ size 5616
runs/Jan16_16-15-01_b00663e406e8/events.out.tfevents.1705421708.b00663e406e8.1025.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5d66c79026e42a3a7ce0d99f35acfa712939abf9f9313d43043cca7291e42277
3
+ size 4994
runs/Jan16_16-15-55_b00663e406e8/events.out.tfevents.1705421760.b00663e406e8.1127.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:682f7df8232dec135263b58fcae453e8c84456001b89574ade19c16f00c47764
3
+ size 5617
runs/Jan16_16-20-42_b00663e406e8/events.out.tfevents.1705422048.b00663e406e8.1277.0 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:faf026aeac0d86a0949e222312ed11c0d29b4c07da39dcaf1a9f9dd8dfa6f002
3
+ size 19445
runs/Jan16_16-20-42_b00663e406e8/events.out.tfevents.1705434982.b00663e406e8.1277.1 ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c23866f489067034e29508e7bc9c521794c8bcf49cb304899890b6d8a72fff3
3
+ size 828
special_tokens_map.json ADDED
@@ -0,0 +1,30 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "</s>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": false,
20
+ "rstrip": false,
21
+ "single_word": false
22
+ },
23
+ "unk_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ }
30
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
@@ -0,0 +1,43 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": false,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ }
29
+ },
30
+ "bos_token": "<s>",
31
+ "chat_template": "{% for message in messages %}\n{% if message['role'] == 'user' %}\n{{ '<|user|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'system' %}\n{{ '<|system|>\n' + message['content'] + eos_token }}\n{% elif message['role'] == 'assistant' %}\n{{ '<|assistant|>\n' + message['content'] + eos_token }}\n{% endif %}\n{% if loop.last and add_generation_prompt %}\n{{ '<|assistant|>' }}\n{% endif %}\n{% endfor %}",
32
+ "clean_up_tokenization_spaces": false,
33
+ "eos_token": "</s>",
34
+ "legacy": false,
35
+ "model_max_length": 2048,
36
+ "pad_token": "</s>",
37
+ "padding_side": "right",
38
+ "sp_model_kwargs": {},
39
+ "spaces_between_special_tokens": false,
40
+ "tokenizer_class": "LlamaTokenizer",
41
+ "unk_token": "<unk>",
42
+ "use_default_system_prompt": false
43
+ }
train_results.json ADDED
@@ -0,0 +1,8 @@
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "epoch": 2.97,
3
+ "train_loss": 0.6770667407682033,
4
+ "train_runtime": 12894.8655,
5
+ "train_samples": 9845,
6
+ "train_samples_per_second": 2.29,
7
+ "train_steps_per_second": 0.014
8
+ }
trainer_state.json ADDED
@@ -0,0 +1,344 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "best_metric": null,
3
+ "best_model_checkpoint": null,
4
+ "epoch": 2.9740985271711526,
5
+ "eval_steps": 100,
6
+ "global_step": 183,
7
+ "is_hyper_param_search": false,
8
+ "is_local_process_zero": true,
9
+ "is_world_process_zero": true,
10
+ "log_history": [
11
+ {
12
+ "epoch": 0.02,
13
+ "learning_rate": 1.5789473684210525e-08,
14
+ "logits/chosen": -2.2486207485198975,
15
+ "logits/rejected": -2.1592307090759277,
16
+ "logps/chosen": -384.7846984863281,
17
+ "logps/rejected": -317.2812805175781,
18
+ "loss": 0.6931,
19
+ "rewards/accuracies": 0.0,
20
+ "rewards/chosen": 0.0,
21
+ "rewards/margins": 0.0,
22
+ "rewards/rejected": 0.0,
23
+ "step": 1
24
+ },
25
+ {
26
+ "epoch": 0.16,
27
+ "learning_rate": 1.5789473684210525e-07,
28
+ "logits/chosen": -2.2348005771636963,
29
+ "logits/rejected": -2.178800344467163,
30
+ "logps/chosen": -394.6236572265625,
31
+ "logps/rejected": -356.31573486328125,
32
+ "loss": 0.6933,
33
+ "rewards/accuracies": 0.44583335518836975,
34
+ "rewards/chosen": -0.001932556857354939,
35
+ "rewards/margins": 0.0004956678603775799,
36
+ "rewards/rejected": -0.002428224543109536,
37
+ "step": 10
38
+ },
39
+ {
40
+ "epoch": 0.33,
41
+ "learning_rate": 2.9817073170731707e-07,
42
+ "logits/chosen": -2.249009609222412,
43
+ "logits/rejected": -2.1796250343322754,
44
+ "logps/chosen": -389.2425842285156,
45
+ "logps/rejected": -345.89288330078125,
46
+ "loss": 0.6924,
47
+ "rewards/accuracies": 0.5162500143051147,
48
+ "rewards/chosen": -0.00907914899289608,
49
+ "rewards/margins": 0.0023889499716460705,
50
+ "rewards/rejected": -0.011468099430203438,
51
+ "step": 20
52
+ },
53
+ {
54
+ "epoch": 0.49,
55
+ "learning_rate": 2.798780487804878e-07,
56
+ "logits/chosen": -2.260974645614624,
57
+ "logits/rejected": -2.17901873588562,
58
+ "logps/chosen": -385.93841552734375,
59
+ "logps/rejected": -346.94970703125,
60
+ "loss": 0.6872,
61
+ "rewards/accuracies": 0.5725000500679016,
62
+ "rewards/chosen": -0.018068989738821983,
63
+ "rewards/margins": 0.013102496974170208,
64
+ "rewards/rejected": -0.031171485781669617,
65
+ "step": 30
66
+ },
67
+ {
68
+ "epoch": 0.65,
69
+ "learning_rate": 2.615853658536585e-07,
70
+ "logits/chosen": -2.24247670173645,
71
+ "logits/rejected": -2.1864609718322754,
72
+ "logps/chosen": -388.5516357421875,
73
+ "logps/rejected": -351.4178466796875,
74
+ "loss": 0.6851,
75
+ "rewards/accuracies": 0.5887500047683716,
76
+ "rewards/chosen": -0.029972827062010765,
77
+ "rewards/margins": 0.017685705795884132,
78
+ "rewards/rejected": -0.0476585291326046,
79
+ "step": 40
80
+ },
81
+ {
82
+ "epoch": 0.81,
83
+ "learning_rate": 2.4329268292682927e-07,
84
+ "logits/chosen": -2.234823703765869,
85
+ "logits/rejected": -2.1762919425964355,
86
+ "logps/chosen": -395.30572509765625,
87
+ "logps/rejected": -348.18743896484375,
88
+ "loss": 0.6845,
89
+ "rewards/accuracies": 0.5837500095367432,
90
+ "rewards/chosen": -0.038403529673814774,
91
+ "rewards/margins": 0.019298262894153595,
92
+ "rewards/rejected": -0.05770179629325867,
93
+ "step": 50
94
+ },
95
+ {
96
+ "epoch": 0.98,
97
+ "learning_rate": 2.25e-07,
98
+ "logits/chosen": -2.2393314838409424,
99
+ "logits/rejected": -2.1633594036102295,
100
+ "logps/chosen": -398.8902282714844,
101
+ "logps/rejected": -351.87567138671875,
102
+ "loss": 0.6819,
103
+ "rewards/accuracies": 0.6031250357627869,
104
+ "rewards/chosen": -0.042560458183288574,
105
+ "rewards/margins": 0.02526494860649109,
106
+ "rewards/rejected": -0.06782540678977966,
107
+ "step": 60
108
+ },
109
+ {
110
+ "epoch": 0.99,
111
+ "eval_logits/chosen": -2.267106294631958,
112
+ "eval_logits/rejected": -2.135558605194092,
113
+ "eval_logps/chosen": -408.3842468261719,
114
+ "eval_logps/rejected": -363.4671630859375,
115
+ "eval_loss": 0.6804352402687073,
116
+ "eval_rewards/accuracies": 0.6318407654762268,
117
+ "eval_rewards/chosen": -0.04653656482696533,
118
+ "eval_rewards/margins": 0.028099289163947105,
119
+ "eval_rewards/rejected": -0.07463585585355759,
120
+ "eval_runtime": 39.5616,
121
+ "eval_samples_per_second": 5.081,
122
+ "eval_steps_per_second": 1.694,
123
+ "step": 61
124
+ },
125
+ {
126
+ "epoch": 1.14,
127
+ "learning_rate": 2.0670731707317071e-07,
128
+ "logits/chosen": -2.2363815307617188,
129
+ "logits/rejected": -2.161525249481201,
130
+ "logps/chosen": -383.36749267578125,
131
+ "logps/rejected": -351.52716064453125,
132
+ "loss": 0.6761,
133
+ "rewards/accuracies": 0.6212500333786011,
134
+ "rewards/chosen": -0.04697619006037712,
135
+ "rewards/margins": 0.03767210990190506,
136
+ "rewards/rejected": -0.08464829623699188,
137
+ "step": 70
138
+ },
139
+ {
140
+ "epoch": 1.3,
141
+ "learning_rate": 1.8841463414634146e-07,
142
+ "logits/chosen": -2.2267136573791504,
143
+ "logits/rejected": -2.1534907817840576,
144
+ "logps/chosen": -390.09552001953125,
145
+ "logps/rejected": -340.2105407714844,
146
+ "loss": 0.6743,
147
+ "rewards/accuracies": 0.6318750381469727,
148
+ "rewards/chosen": -0.05278144031763077,
149
+ "rewards/margins": 0.04189059138298035,
150
+ "rewards/rejected": -0.09467203170061111,
151
+ "step": 80
152
+ },
153
+ {
154
+ "epoch": 1.46,
155
+ "learning_rate": 1.7012195121951216e-07,
156
+ "logits/chosen": -2.235853910446167,
157
+ "logits/rejected": -2.1650447845458984,
158
+ "logps/chosen": -383.0482482910156,
159
+ "logps/rejected": -348.92327880859375,
160
+ "loss": 0.6747,
161
+ "rewards/accuracies": 0.6206250190734863,
162
+ "rewards/chosen": -0.06036309152841568,
163
+ "rewards/margins": 0.0413818284869194,
164
+ "rewards/rejected": -0.10174493491649628,
165
+ "step": 90
166
+ },
167
+ {
168
+ "epoch": 1.63,
169
+ "learning_rate": 1.518292682926829e-07,
170
+ "logits/chosen": -2.216538667678833,
171
+ "logits/rejected": -2.151155471801758,
172
+ "logps/chosen": -393.03411865234375,
173
+ "logps/rejected": -351.6760559082031,
174
+ "loss": 0.6739,
175
+ "rewards/accuracies": 0.6306250095367432,
176
+ "rewards/chosen": -0.0625002533197403,
177
+ "rewards/margins": 0.04337610676884651,
178
+ "rewards/rejected": -0.10587634891271591,
179
+ "step": 100
180
+ },
181
+ {
182
+ "epoch": 1.79,
183
+ "learning_rate": 1.3353658536585366e-07,
184
+ "logits/chosen": -2.2359976768493652,
185
+ "logits/rejected": -2.1719553470611572,
186
+ "logps/chosen": -396.144775390625,
187
+ "logps/rejected": -347.8782653808594,
188
+ "loss": 0.676,
189
+ "rewards/accuracies": 0.6212500333786011,
190
+ "rewards/chosen": -0.06705383211374283,
191
+ "rewards/margins": 0.03948701545596123,
192
+ "rewards/rejected": -0.10654083639383316,
193
+ "step": 110
194
+ },
195
+ {
196
+ "epoch": 1.95,
197
+ "learning_rate": 1.1524390243902439e-07,
198
+ "logits/chosen": -2.2277469635009766,
199
+ "logits/rejected": -2.140388011932373,
200
+ "logps/chosen": -403.00335693359375,
201
+ "logps/rejected": -347.174072265625,
202
+ "loss": 0.6718,
203
+ "rewards/accuracies": 0.6387500166893005,
204
+ "rewards/chosen": -0.06794509291648865,
205
+ "rewards/margins": 0.04841512814164162,
206
+ "rewards/rejected": -0.11636020988225937,
207
+ "step": 120
208
+ },
209
+ {
210
+ "epoch": 2.0,
211
+ "eval_logits/chosen": -2.258761167526245,
212
+ "eval_logits/rejected": -2.1242141723632812,
213
+ "eval_logps/chosen": -408.6074523925781,
214
+ "eval_logps/rejected": -363.89111328125,
215
+ "eval_loss": 0.6719397306442261,
216
+ "eval_rewards/accuracies": 0.6268656849861145,
217
+ "eval_rewards/chosen": -0.06885469704866409,
218
+ "eval_rewards/margins": 0.04817221686244011,
219
+ "eval_rewards/rejected": -0.1170269101858139,
220
+ "eval_runtime": 39.3145,
221
+ "eval_samples_per_second": 5.113,
222
+ "eval_steps_per_second": 1.704,
223
+ "step": 123
224
+ },
225
+ {
226
+ "epoch": 2.11,
227
+ "learning_rate": 9.695121951219512e-08,
228
+ "logits/chosen": -2.206012725830078,
229
+ "logits/rejected": -2.150679588317871,
230
+ "logps/chosen": -391.74652099609375,
231
+ "logps/rejected": -360.0486755371094,
232
+ "loss": 0.6714,
233
+ "rewards/accuracies": 0.6306250095367432,
234
+ "rewards/chosen": -0.07042767852544785,
235
+ "rewards/margins": 0.04965236037969589,
236
+ "rewards/rejected": -0.12008003145456314,
237
+ "step": 130
238
+ },
239
+ {
240
+ "epoch": 2.28,
241
+ "learning_rate": 7.865853658536585e-08,
242
+ "logits/chosen": -2.221883535385132,
243
+ "logits/rejected": -2.152527332305908,
244
+ "logps/chosen": -389.9949645996094,
245
+ "logps/rejected": -348.03179931640625,
246
+ "loss": 0.6705,
247
+ "rewards/accuracies": 0.6293750405311584,
248
+ "rewards/chosen": -0.07382883131504059,
249
+ "rewards/margins": 0.051857102662324905,
250
+ "rewards/rejected": -0.1256859302520752,
251
+ "step": 140
252
+ },
253
+ {
254
+ "epoch": 2.44,
255
+ "learning_rate": 6.036585365853659e-08,
256
+ "logits/chosen": -2.2389044761657715,
257
+ "logits/rejected": -2.171718120574951,
258
+ "logps/chosen": -391.4035949707031,
259
+ "logps/rejected": -351.60931396484375,
260
+ "loss": 0.6689,
261
+ "rewards/accuracies": 0.6475000381469727,
262
+ "rewards/chosen": -0.07051853090524673,
263
+ "rewards/margins": 0.055325280874967575,
264
+ "rewards/rejected": -0.1258438229560852,
265
+ "step": 150
266
+ },
267
+ {
268
+ "epoch": 2.6,
269
+ "learning_rate": 4.207317073170732e-08,
270
+ "logits/chosen": -2.2196097373962402,
271
+ "logits/rejected": -2.141348361968994,
272
+ "logps/chosen": -396.71124267578125,
273
+ "logps/rejected": -345.557861328125,
274
+ "loss": 0.6678,
275
+ "rewards/accuracies": 0.6568750739097595,
276
+ "rewards/chosen": -0.07177285104990005,
277
+ "rewards/margins": 0.057598698884248734,
278
+ "rewards/rejected": -0.1293715536594391,
279
+ "step": 160
280
+ },
281
+ {
282
+ "epoch": 2.76,
283
+ "learning_rate": 2.3780487804878048e-08,
284
+ "logits/chosen": -2.226057767868042,
285
+ "logits/rejected": -2.160684108734131,
286
+ "logps/chosen": -401.338623046875,
287
+ "logps/rejected": -358.25018310546875,
288
+ "loss": 0.6708,
289
+ "rewards/accuracies": 0.6156250238418579,
290
+ "rewards/chosen": -0.07205704599618912,
291
+ "rewards/margins": 0.05227302759885788,
292
+ "rewards/rejected": -0.1243300810456276,
293
+ "step": 170
294
+ },
295
+ {
296
+ "epoch": 2.93,
297
+ "learning_rate": 5.48780487804878e-09,
298
+ "logits/chosen": -2.21893572807312,
299
+ "logits/rejected": -2.1349940299987793,
300
+ "logps/chosen": -383.5135498046875,
301
+ "logps/rejected": -338.13592529296875,
302
+ "loss": 0.6687,
303
+ "rewards/accuracies": 0.6456249952316284,
304
+ "rewards/chosen": -0.07878817617893219,
305
+ "rewards/margins": 0.05617079883813858,
306
+ "rewards/rejected": -0.13495896756649017,
307
+ "step": 180
308
+ },
309
+ {
310
+ "epoch": 2.97,
311
+ "eval_logits/chosen": -2.256798028945923,
312
+ "eval_logits/rejected": -2.121875524520874,
313
+ "eval_logps/chosen": -408.6165466308594,
314
+ "eval_logps/rejected": -364.0181884765625,
315
+ "eval_loss": 0.6663674712181091,
316
+ "eval_rewards/accuracies": 0.6666666865348816,
317
+ "eval_rewards/chosen": -0.06976744532585144,
318
+ "eval_rewards/margins": 0.05996997281908989,
319
+ "eval_rewards/rejected": -0.12973742187023163,
320
+ "eval_runtime": 39.4977,
321
+ "eval_samples_per_second": 5.089,
322
+ "eval_steps_per_second": 1.696,
323
+ "step": 183
324
+ },
325
+ {
326
+ "epoch": 2.97,
327
+ "step": 183,
328
+ "total_flos": 0.0,
329
+ "train_loss": 0.6770667407682033,
330
+ "train_runtime": 12894.8655,
331
+ "train_samples_per_second": 2.29,
332
+ "train_steps_per_second": 0.014
333
+ }
334
+ ],
335
+ "logging_steps": 10,
336
+ "max_steps": 183,
337
+ "num_input_tokens_seen": 0,
338
+ "num_train_epochs": 3,
339
+ "save_steps": 500,
340
+ "total_flos": 0.0,
341
+ "train_batch_size": 5,
342
+ "trial_name": null,
343
+ "trial_params": null
344
+ }
training_args.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:557ca6e635d24ea7237db9d29d8fbec0a8b9e0c93bcc70aaaf903828aae78bc2
3
+ size 4920