Xiaodong commited on
Commit
f53095a
·
verified ·
1 Parent(s): bb16351

Delete checkpoint-160

Browse files
checkpoint-160/added_tokens.json DELETED
@@ -1,16 +0,0 @@
1
- {
2
- "<|box_end|>": 151649,
3
- "<|box_start|>": 151648,
4
- "<|endoftext|>": 151643,
5
- "<|im_end|>": 151645,
6
- "<|im_start|>": 151644,
7
- "<|image_pad|>": 151655,
8
- "<|object_ref_end|>": 151647,
9
- "<|object_ref_start|>": 151646,
10
- "<|quad_end|>": 151651,
11
- "<|quad_start|>": 151650,
12
- "<|video_pad|>": 151656,
13
- "<|vision_end|>": 151653,
14
- "<|vision_pad|>": 151654,
15
- "<|vision_start|>": 151652
16
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-160/chat_template.json DELETED
@@ -1,3 +0,0 @@
1
- {
2
- "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}"
3
- }
 
 
 
 
checkpoint-160/config.json DELETED
@@ -1,47 +0,0 @@
1
- {
2
- "_name_or_path": "/data/wangxd/models/Qwen2-VL-7B-Instruct",
3
- "architectures": [
4
- "Qwen2VLForConditionalGeneration"
5
- ],
6
- "attention_dropout": 0.0,
7
- "bos_token_id": 151643,
8
- "eos_token_id": 151645,
9
- "hidden_act": "silu",
10
- "hidden_size": 3584,
11
- "image_token_id": 151655,
12
- "initializer_range": 0.02,
13
- "intermediate_size": 18944,
14
- "max_position_embeddings": 32768,
15
- "max_window_layers": 28,
16
- "model_type": "qwen2_vl",
17
- "num_attention_heads": 28,
18
- "num_hidden_layers": 28,
19
- "num_key_value_heads": 4,
20
- "rms_norm_eps": 1e-06,
21
- "rope_scaling": {
22
- "mrope_section": [
23
- 16,
24
- 24,
25
- 24
26
- ],
27
- "rope_type": "default",
28
- "type": "default"
29
- },
30
- "rope_theta": 1000000.0,
31
- "sliding_window": 32768,
32
- "tie_word_embeddings": false,
33
- "torch_dtype": "bfloat16",
34
- "transformers_version": "4.48.3",
35
- "use_cache": false,
36
- "use_sliding_window": false,
37
- "video_token_id": 151656,
38
- "vision_config": {
39
- "in_chans": 3,
40
- "model_type": "qwen2_vl",
41
- "spatial_patch_size": 14
42
- },
43
- "vision_end_token_id": 151653,
44
- "vision_start_token_id": 151652,
45
- "vision_token_id": 151654,
46
- "vocab_size": 152064
47
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-160/generation_config.json DELETED
@@ -1,15 +0,0 @@
1
- {
2
- "attn_implementation": "flash_attention_2",
3
- "bos_token_id": 151643,
4
- "do_sample": true,
5
- "eos_token_id": [
6
- 151645,
7
- 151643
8
- ],
9
- "pad_token_id": 151643,
10
- "temperature": 0.01,
11
- "top_k": 1,
12
- "top_p": 0.001,
13
- "transformers_version": "4.48.3",
14
- "use_cache": false
15
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-160/merges.txt DELETED
The diff for this file is too large to render. See raw diff
 
checkpoint-160/model.safetensors.index.json DELETED
@@ -1,737 +0,0 @@
1
- {
2
- "metadata": {
3
- "total_size": 16582751232
4
- },
5
- "weight_map": {
6
- "lm_head.weight": "model-00004-of-00004.safetensors",
7
- "model.embed_tokens.weight": "model-00001-of-00004.safetensors",
8
- "model.layers.0.input_layernorm.weight": "model-00001-of-00004.safetensors",
9
- "model.layers.0.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
10
- "model.layers.0.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
11
- "model.layers.0.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
12
- "model.layers.0.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
13
- "model.layers.0.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
14
- "model.layers.0.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
15
- "model.layers.0.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
16
- "model.layers.0.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
17
- "model.layers.0.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
18
- "model.layers.0.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
19
- "model.layers.0.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
20
- "model.layers.1.input_layernorm.weight": "model-00001-of-00004.safetensors",
21
- "model.layers.1.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
22
- "model.layers.1.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
23
- "model.layers.1.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
24
- "model.layers.1.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
25
- "model.layers.1.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
26
- "model.layers.1.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
27
- "model.layers.1.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
28
- "model.layers.1.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
29
- "model.layers.1.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
30
- "model.layers.1.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
31
- "model.layers.1.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
32
- "model.layers.10.input_layernorm.weight": "model-00002-of-00004.safetensors",
33
- "model.layers.10.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
34
- "model.layers.10.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
35
- "model.layers.10.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
36
- "model.layers.10.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
37
- "model.layers.10.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
38
- "model.layers.10.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
39
- "model.layers.10.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
40
- "model.layers.10.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
41
- "model.layers.10.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
42
- "model.layers.10.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
43
- "model.layers.10.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
44
- "model.layers.11.input_layernorm.weight": "model-00002-of-00004.safetensors",
45
- "model.layers.11.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
46
- "model.layers.11.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
47
- "model.layers.11.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
48
- "model.layers.11.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
49
- "model.layers.11.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
50
- "model.layers.11.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
51
- "model.layers.11.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
52
- "model.layers.11.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
53
- "model.layers.11.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
54
- "model.layers.11.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
55
- "model.layers.11.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
56
- "model.layers.12.input_layernorm.weight": "model-00002-of-00004.safetensors",
57
- "model.layers.12.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
58
- "model.layers.12.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
59
- "model.layers.12.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
60
- "model.layers.12.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
61
- "model.layers.12.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
62
- "model.layers.12.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
63
- "model.layers.12.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
64
- "model.layers.12.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
65
- "model.layers.12.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
66
- "model.layers.12.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
67
- "model.layers.12.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
68
- "model.layers.13.input_layernorm.weight": "model-00002-of-00004.safetensors",
69
- "model.layers.13.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
70
- "model.layers.13.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
71
- "model.layers.13.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
72
- "model.layers.13.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
73
- "model.layers.13.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
74
- "model.layers.13.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
75
- "model.layers.13.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
76
- "model.layers.13.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
77
- "model.layers.13.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
78
- "model.layers.13.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
79
- "model.layers.13.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
80
- "model.layers.14.input_layernorm.weight": "model-00002-of-00004.safetensors",
81
- "model.layers.14.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
82
- "model.layers.14.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
83
- "model.layers.14.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
84
- "model.layers.14.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
85
- "model.layers.14.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
86
- "model.layers.14.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
87
- "model.layers.14.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
88
- "model.layers.14.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
89
- "model.layers.14.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
90
- "model.layers.14.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
91
- "model.layers.14.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
92
- "model.layers.15.input_layernorm.weight": "model-00002-of-00004.safetensors",
93
- "model.layers.15.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
94
- "model.layers.15.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
95
- "model.layers.15.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
96
- "model.layers.15.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
97
- "model.layers.15.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
98
- "model.layers.15.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
99
- "model.layers.15.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
100
- "model.layers.15.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
101
- "model.layers.15.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
102
- "model.layers.15.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
103
- "model.layers.15.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
104
- "model.layers.16.input_layernorm.weight": "model-00003-of-00004.safetensors",
105
- "model.layers.16.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
106
- "model.layers.16.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
107
- "model.layers.16.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
108
- "model.layers.16.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
109
- "model.layers.16.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
110
- "model.layers.16.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
111
- "model.layers.16.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
112
- "model.layers.16.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
113
- "model.layers.16.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
114
- "model.layers.16.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
115
- "model.layers.16.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
116
- "model.layers.17.input_layernorm.weight": "model-00003-of-00004.safetensors",
117
- "model.layers.17.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
118
- "model.layers.17.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
119
- "model.layers.17.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
120
- "model.layers.17.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
121
- "model.layers.17.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
122
- "model.layers.17.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
123
- "model.layers.17.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
124
- "model.layers.17.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
125
- "model.layers.17.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
126
- "model.layers.17.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
127
- "model.layers.17.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
128
- "model.layers.18.input_layernorm.weight": "model-00003-of-00004.safetensors",
129
- "model.layers.18.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
130
- "model.layers.18.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
131
- "model.layers.18.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
132
- "model.layers.18.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
133
- "model.layers.18.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
134
- "model.layers.18.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
135
- "model.layers.18.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
136
- "model.layers.18.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
137
- "model.layers.18.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
138
- "model.layers.18.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
139
- "model.layers.18.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
140
- "model.layers.19.input_layernorm.weight": "model-00003-of-00004.safetensors",
141
- "model.layers.19.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
142
- "model.layers.19.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
143
- "model.layers.19.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
144
- "model.layers.19.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
145
- "model.layers.19.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
146
- "model.layers.19.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
147
- "model.layers.19.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
148
- "model.layers.19.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
149
- "model.layers.19.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
150
- "model.layers.19.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
151
- "model.layers.19.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
152
- "model.layers.2.input_layernorm.weight": "model-00001-of-00004.safetensors",
153
- "model.layers.2.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
154
- "model.layers.2.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
155
- "model.layers.2.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
156
- "model.layers.2.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
157
- "model.layers.2.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
158
- "model.layers.2.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
159
- "model.layers.2.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
160
- "model.layers.2.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
161
- "model.layers.2.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
162
- "model.layers.2.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
163
- "model.layers.2.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
164
- "model.layers.20.input_layernorm.weight": "model-00003-of-00004.safetensors",
165
- "model.layers.20.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
166
- "model.layers.20.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
167
- "model.layers.20.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
168
- "model.layers.20.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
169
- "model.layers.20.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
170
- "model.layers.20.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
171
- "model.layers.20.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
172
- "model.layers.20.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
173
- "model.layers.20.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
174
- "model.layers.20.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
175
- "model.layers.20.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
176
- "model.layers.21.input_layernorm.weight": "model-00003-of-00004.safetensors",
177
- "model.layers.21.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
178
- "model.layers.21.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
179
- "model.layers.21.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
180
- "model.layers.21.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
181
- "model.layers.21.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
182
- "model.layers.21.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
183
- "model.layers.21.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
184
- "model.layers.21.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
185
- "model.layers.21.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
186
- "model.layers.21.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
187
- "model.layers.21.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
188
- "model.layers.22.input_layernorm.weight": "model-00003-of-00004.safetensors",
189
- "model.layers.22.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
190
- "model.layers.22.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
191
- "model.layers.22.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
192
- "model.layers.22.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
193
- "model.layers.22.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
194
- "model.layers.22.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
195
- "model.layers.22.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
196
- "model.layers.22.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
197
- "model.layers.22.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
198
- "model.layers.22.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
199
- "model.layers.22.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
200
- "model.layers.23.input_layernorm.weight": "model-00003-of-00004.safetensors",
201
- "model.layers.23.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
202
- "model.layers.23.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
203
- "model.layers.23.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
204
- "model.layers.23.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
205
- "model.layers.23.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
206
- "model.layers.23.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
207
- "model.layers.23.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
208
- "model.layers.23.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
209
- "model.layers.23.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
210
- "model.layers.23.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
211
- "model.layers.23.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
212
- "model.layers.24.input_layernorm.weight": "model-00003-of-00004.safetensors",
213
- "model.layers.24.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
214
- "model.layers.24.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
215
- "model.layers.24.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
216
- "model.layers.24.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
217
- "model.layers.24.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
218
- "model.layers.24.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
219
- "model.layers.24.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
220
- "model.layers.24.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
221
- "model.layers.24.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
222
- "model.layers.24.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
223
- "model.layers.24.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
224
- "model.layers.25.input_layernorm.weight": "model-00003-of-00004.safetensors",
225
- "model.layers.25.mlp.down_proj.weight": "model-00003-of-00004.safetensors",
226
- "model.layers.25.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
227
- "model.layers.25.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
228
- "model.layers.25.post_attention_layernorm.weight": "model-00003-of-00004.safetensors",
229
- "model.layers.25.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
230
- "model.layers.25.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
231
- "model.layers.25.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
232
- "model.layers.25.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
233
- "model.layers.25.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
234
- "model.layers.25.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
235
- "model.layers.25.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
236
- "model.layers.26.input_layernorm.weight": "model-00004-of-00004.safetensors",
237
- "model.layers.26.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
238
- "model.layers.26.mlp.gate_proj.weight": "model-00003-of-00004.safetensors",
239
- "model.layers.26.mlp.up_proj.weight": "model-00003-of-00004.safetensors",
240
- "model.layers.26.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
241
- "model.layers.26.self_attn.k_proj.bias": "model-00003-of-00004.safetensors",
242
- "model.layers.26.self_attn.k_proj.weight": "model-00003-of-00004.safetensors",
243
- "model.layers.26.self_attn.o_proj.weight": "model-00003-of-00004.safetensors",
244
- "model.layers.26.self_attn.q_proj.bias": "model-00003-of-00004.safetensors",
245
- "model.layers.26.self_attn.q_proj.weight": "model-00003-of-00004.safetensors",
246
- "model.layers.26.self_attn.v_proj.bias": "model-00003-of-00004.safetensors",
247
- "model.layers.26.self_attn.v_proj.weight": "model-00003-of-00004.safetensors",
248
- "model.layers.27.input_layernorm.weight": "model-00004-of-00004.safetensors",
249
- "model.layers.27.mlp.down_proj.weight": "model-00004-of-00004.safetensors",
250
- "model.layers.27.mlp.gate_proj.weight": "model-00004-of-00004.safetensors",
251
- "model.layers.27.mlp.up_proj.weight": "model-00004-of-00004.safetensors",
252
- "model.layers.27.post_attention_layernorm.weight": "model-00004-of-00004.safetensors",
253
- "model.layers.27.self_attn.k_proj.bias": "model-00004-of-00004.safetensors",
254
- "model.layers.27.self_attn.k_proj.weight": "model-00004-of-00004.safetensors",
255
- "model.layers.27.self_attn.o_proj.weight": "model-00004-of-00004.safetensors",
256
- "model.layers.27.self_attn.q_proj.bias": "model-00004-of-00004.safetensors",
257
- "model.layers.27.self_attn.q_proj.weight": "model-00004-of-00004.safetensors",
258
- "model.layers.27.self_attn.v_proj.bias": "model-00004-of-00004.safetensors",
259
- "model.layers.27.self_attn.v_proj.weight": "model-00004-of-00004.safetensors",
260
- "model.layers.3.input_layernorm.weight": "model-00001-of-00004.safetensors",
261
- "model.layers.3.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
262
- "model.layers.3.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
263
- "model.layers.3.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
264
- "model.layers.3.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
265
- "model.layers.3.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
266
- "model.layers.3.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
267
- "model.layers.3.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
268
- "model.layers.3.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
269
- "model.layers.3.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
270
- "model.layers.3.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
271
- "model.layers.3.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
272
- "model.layers.4.input_layernorm.weight": "model-00001-of-00004.safetensors",
273
- "model.layers.4.mlp.down_proj.weight": "model-00001-of-00004.safetensors",
274
- "model.layers.4.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
275
- "model.layers.4.mlp.up_proj.weight": "model-00001-of-00004.safetensors",
276
- "model.layers.4.post_attention_layernorm.weight": "model-00001-of-00004.safetensors",
277
- "model.layers.4.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
278
- "model.layers.4.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
279
- "model.layers.4.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
280
- "model.layers.4.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
281
- "model.layers.4.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
282
- "model.layers.4.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
283
- "model.layers.4.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
284
- "model.layers.5.input_layernorm.weight": "model-00002-of-00004.safetensors",
285
- "model.layers.5.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
286
- "model.layers.5.mlp.gate_proj.weight": "model-00001-of-00004.safetensors",
287
- "model.layers.5.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
288
- "model.layers.5.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
289
- "model.layers.5.self_attn.k_proj.bias": "model-00001-of-00004.safetensors",
290
- "model.layers.5.self_attn.k_proj.weight": "model-00001-of-00004.safetensors",
291
- "model.layers.5.self_attn.o_proj.weight": "model-00001-of-00004.safetensors",
292
- "model.layers.5.self_attn.q_proj.bias": "model-00001-of-00004.safetensors",
293
- "model.layers.5.self_attn.q_proj.weight": "model-00001-of-00004.safetensors",
294
- "model.layers.5.self_attn.v_proj.bias": "model-00001-of-00004.safetensors",
295
- "model.layers.5.self_attn.v_proj.weight": "model-00001-of-00004.safetensors",
296
- "model.layers.6.input_layernorm.weight": "model-00002-of-00004.safetensors",
297
- "model.layers.6.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
298
- "model.layers.6.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
299
- "model.layers.6.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
300
- "model.layers.6.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
301
- "model.layers.6.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
302
- "model.layers.6.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
303
- "model.layers.6.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
304
- "model.layers.6.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
305
- "model.layers.6.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
306
- "model.layers.6.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
307
- "model.layers.6.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
308
- "model.layers.7.input_layernorm.weight": "model-00002-of-00004.safetensors",
309
- "model.layers.7.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
310
- "model.layers.7.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
311
- "model.layers.7.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
312
- "model.layers.7.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
313
- "model.layers.7.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
314
- "model.layers.7.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
315
- "model.layers.7.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
316
- "model.layers.7.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
317
- "model.layers.7.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
318
- "model.layers.7.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
319
- "model.layers.7.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
320
- "model.layers.8.input_layernorm.weight": "model-00002-of-00004.safetensors",
321
- "model.layers.8.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
322
- "model.layers.8.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
323
- "model.layers.8.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
324
- "model.layers.8.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
325
- "model.layers.8.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
326
- "model.layers.8.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
327
- "model.layers.8.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
328
- "model.layers.8.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
329
- "model.layers.8.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
330
- "model.layers.8.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
331
- "model.layers.8.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
332
- "model.layers.9.input_layernorm.weight": "model-00002-of-00004.safetensors",
333
- "model.layers.9.mlp.down_proj.weight": "model-00002-of-00004.safetensors",
334
- "model.layers.9.mlp.gate_proj.weight": "model-00002-of-00004.safetensors",
335
- "model.layers.9.mlp.up_proj.weight": "model-00002-of-00004.safetensors",
336
- "model.layers.9.post_attention_layernorm.weight": "model-00002-of-00004.safetensors",
337
- "model.layers.9.self_attn.k_proj.bias": "model-00002-of-00004.safetensors",
338
- "model.layers.9.self_attn.k_proj.weight": "model-00002-of-00004.safetensors",
339
- "model.layers.9.self_attn.o_proj.weight": "model-00002-of-00004.safetensors",
340
- "model.layers.9.self_attn.q_proj.bias": "model-00002-of-00004.safetensors",
341
- "model.layers.9.self_attn.q_proj.weight": "model-00002-of-00004.safetensors",
342
- "model.layers.9.self_attn.v_proj.bias": "model-00002-of-00004.safetensors",
343
- "model.layers.9.self_attn.v_proj.weight": "model-00002-of-00004.safetensors",
344
- "model.norm.weight": "model-00004-of-00004.safetensors",
345
- "visual.blocks.0.attn.proj.bias": "model-00001-of-00004.safetensors",
346
- "visual.blocks.0.attn.proj.weight": "model-00001-of-00004.safetensors",
347
- "visual.blocks.0.attn.qkv.bias": "model-00001-of-00004.safetensors",
348
- "visual.blocks.0.attn.qkv.weight": "model-00001-of-00004.safetensors",
349
- "visual.blocks.0.mlp.fc1.bias": "model-00001-of-00004.safetensors",
350
- "visual.blocks.0.mlp.fc1.weight": "model-00001-of-00004.safetensors",
351
- "visual.blocks.0.mlp.fc2.bias": "model-00001-of-00004.safetensors",
352
- "visual.blocks.0.mlp.fc2.weight": "model-00001-of-00004.safetensors",
353
- "visual.blocks.0.norm1.bias": "model-00001-of-00004.safetensors",
354
- "visual.blocks.0.norm1.weight": "model-00001-of-00004.safetensors",
355
- "visual.blocks.0.norm2.bias": "model-00001-of-00004.safetensors",
356
- "visual.blocks.0.norm2.weight": "model-00001-of-00004.safetensors",
357
- "visual.blocks.1.attn.proj.bias": "model-00001-of-00004.safetensors",
358
- "visual.blocks.1.attn.proj.weight": "model-00001-of-00004.safetensors",
359
- "visual.blocks.1.attn.qkv.bias": "model-00001-of-00004.safetensors",
360
- "visual.blocks.1.attn.qkv.weight": "model-00001-of-00004.safetensors",
361
- "visual.blocks.1.mlp.fc1.bias": "model-00001-of-00004.safetensors",
362
- "visual.blocks.1.mlp.fc1.weight": "model-00001-of-00004.safetensors",
363
- "visual.blocks.1.mlp.fc2.bias": "model-00001-of-00004.safetensors",
364
- "visual.blocks.1.mlp.fc2.weight": "model-00001-of-00004.safetensors",
365
- "visual.blocks.1.norm1.bias": "model-00001-of-00004.safetensors",
366
- "visual.blocks.1.norm1.weight": "model-00001-of-00004.safetensors",
367
- "visual.blocks.1.norm2.bias": "model-00001-of-00004.safetensors",
368
- "visual.blocks.1.norm2.weight": "model-00001-of-00004.safetensors",
369
- "visual.blocks.10.attn.proj.bias": "model-00001-of-00004.safetensors",
370
- "visual.blocks.10.attn.proj.weight": "model-00001-of-00004.safetensors",
371
- "visual.blocks.10.attn.qkv.bias": "model-00001-of-00004.safetensors",
372
- "visual.blocks.10.attn.qkv.weight": "model-00001-of-00004.safetensors",
373
- "visual.blocks.10.mlp.fc1.bias": "model-00001-of-00004.safetensors",
374
- "visual.blocks.10.mlp.fc1.weight": "model-00001-of-00004.safetensors",
375
- "visual.blocks.10.mlp.fc2.bias": "model-00001-of-00004.safetensors",
376
- "visual.blocks.10.mlp.fc2.weight": "model-00001-of-00004.safetensors",
377
- "visual.blocks.10.norm1.bias": "model-00001-of-00004.safetensors",
378
- "visual.blocks.10.norm1.weight": "model-00001-of-00004.safetensors",
379
- "visual.blocks.10.norm2.bias": "model-00001-of-00004.safetensors",
380
- "visual.blocks.10.norm2.weight": "model-00001-of-00004.safetensors",
381
- "visual.blocks.11.attn.proj.bias": "model-00001-of-00004.safetensors",
382
- "visual.blocks.11.attn.proj.weight": "model-00001-of-00004.safetensors",
383
- "visual.blocks.11.attn.qkv.bias": "model-00001-of-00004.safetensors",
384
- "visual.blocks.11.attn.qkv.weight": "model-00001-of-00004.safetensors",
385
- "visual.blocks.11.mlp.fc1.bias": "model-00001-of-00004.safetensors",
386
- "visual.blocks.11.mlp.fc1.weight": "model-00001-of-00004.safetensors",
387
- "visual.blocks.11.mlp.fc2.bias": "model-00001-of-00004.safetensors",
388
- "visual.blocks.11.mlp.fc2.weight": "model-00001-of-00004.safetensors",
389
- "visual.blocks.11.norm1.bias": "model-00001-of-00004.safetensors",
390
- "visual.blocks.11.norm1.weight": "model-00001-of-00004.safetensors",
391
- "visual.blocks.11.norm2.bias": "model-00001-of-00004.safetensors",
392
- "visual.blocks.11.norm2.weight": "model-00001-of-00004.safetensors",
393
- "visual.blocks.12.attn.proj.bias": "model-00001-of-00004.safetensors",
394
- "visual.blocks.12.attn.proj.weight": "model-00001-of-00004.safetensors",
395
- "visual.blocks.12.attn.qkv.bias": "model-00001-of-00004.safetensors",
396
- "visual.blocks.12.attn.qkv.weight": "model-00001-of-00004.safetensors",
397
- "visual.blocks.12.mlp.fc1.bias": "model-00001-of-00004.safetensors",
398
- "visual.blocks.12.mlp.fc1.weight": "model-00001-of-00004.safetensors",
399
- "visual.blocks.12.mlp.fc2.bias": "model-00001-of-00004.safetensors",
400
- "visual.blocks.12.mlp.fc2.weight": "model-00001-of-00004.safetensors",
401
- "visual.blocks.12.norm1.bias": "model-00001-of-00004.safetensors",
402
- "visual.blocks.12.norm1.weight": "model-00001-of-00004.safetensors",
403
- "visual.blocks.12.norm2.bias": "model-00001-of-00004.safetensors",
404
- "visual.blocks.12.norm2.weight": "model-00001-of-00004.safetensors",
405
- "visual.blocks.13.attn.proj.bias": "model-00001-of-00004.safetensors",
406
- "visual.blocks.13.attn.proj.weight": "model-00001-of-00004.safetensors",
407
- "visual.blocks.13.attn.qkv.bias": "model-00001-of-00004.safetensors",
408
- "visual.blocks.13.attn.qkv.weight": "model-00001-of-00004.safetensors",
409
- "visual.blocks.13.mlp.fc1.bias": "model-00001-of-00004.safetensors",
410
- "visual.blocks.13.mlp.fc1.weight": "model-00001-of-00004.safetensors",
411
- "visual.blocks.13.mlp.fc2.bias": "model-00001-of-00004.safetensors",
412
- "visual.blocks.13.mlp.fc2.weight": "model-00001-of-00004.safetensors",
413
- "visual.blocks.13.norm1.bias": "model-00001-of-00004.safetensors",
414
- "visual.blocks.13.norm1.weight": "model-00001-of-00004.safetensors",
415
- "visual.blocks.13.norm2.bias": "model-00001-of-00004.safetensors",
416
- "visual.blocks.13.norm2.weight": "model-00001-of-00004.safetensors",
417
- "visual.blocks.14.attn.proj.bias": "model-00001-of-00004.safetensors",
418
- "visual.blocks.14.attn.proj.weight": "model-00001-of-00004.safetensors",
419
- "visual.blocks.14.attn.qkv.bias": "model-00001-of-00004.safetensors",
420
- "visual.blocks.14.attn.qkv.weight": "model-00001-of-00004.safetensors",
421
- "visual.blocks.14.mlp.fc1.bias": "model-00001-of-00004.safetensors",
422
- "visual.blocks.14.mlp.fc1.weight": "model-00001-of-00004.safetensors",
423
- "visual.blocks.14.mlp.fc2.bias": "model-00001-of-00004.safetensors",
424
- "visual.blocks.14.mlp.fc2.weight": "model-00001-of-00004.safetensors",
425
- "visual.blocks.14.norm1.bias": "model-00001-of-00004.safetensors",
426
- "visual.blocks.14.norm1.weight": "model-00001-of-00004.safetensors",
427
- "visual.blocks.14.norm2.bias": "model-00001-of-00004.safetensors",
428
- "visual.blocks.14.norm2.weight": "model-00001-of-00004.safetensors",
429
- "visual.blocks.15.attn.proj.bias": "model-00001-of-00004.safetensors",
430
- "visual.blocks.15.attn.proj.weight": "model-00001-of-00004.safetensors",
431
- "visual.blocks.15.attn.qkv.bias": "model-00001-of-00004.safetensors",
432
- "visual.blocks.15.attn.qkv.weight": "model-00001-of-00004.safetensors",
433
- "visual.blocks.15.mlp.fc1.bias": "model-00001-of-00004.safetensors",
434
- "visual.blocks.15.mlp.fc1.weight": "model-00001-of-00004.safetensors",
435
- "visual.blocks.15.mlp.fc2.bias": "model-00001-of-00004.safetensors",
436
- "visual.blocks.15.mlp.fc2.weight": "model-00001-of-00004.safetensors",
437
- "visual.blocks.15.norm1.bias": "model-00001-of-00004.safetensors",
438
- "visual.blocks.15.norm1.weight": "model-00001-of-00004.safetensors",
439
- "visual.blocks.15.norm2.bias": "model-00001-of-00004.safetensors",
440
- "visual.blocks.15.norm2.weight": "model-00001-of-00004.safetensors",
441
- "visual.blocks.16.attn.proj.bias": "model-00001-of-00004.safetensors",
442
- "visual.blocks.16.attn.proj.weight": "model-00001-of-00004.safetensors",
443
- "visual.blocks.16.attn.qkv.bias": "model-00001-of-00004.safetensors",
444
- "visual.blocks.16.attn.qkv.weight": "model-00001-of-00004.safetensors",
445
- "visual.blocks.16.mlp.fc1.bias": "model-00001-of-00004.safetensors",
446
- "visual.blocks.16.mlp.fc1.weight": "model-00001-of-00004.safetensors",
447
- "visual.blocks.16.mlp.fc2.bias": "model-00001-of-00004.safetensors",
448
- "visual.blocks.16.mlp.fc2.weight": "model-00001-of-00004.safetensors",
449
- "visual.blocks.16.norm1.bias": "model-00001-of-00004.safetensors",
450
- "visual.blocks.16.norm1.weight": "model-00001-of-00004.safetensors",
451
- "visual.blocks.16.norm2.bias": "model-00001-of-00004.safetensors",
452
- "visual.blocks.16.norm2.weight": "model-00001-of-00004.safetensors",
453
- "visual.blocks.17.attn.proj.bias": "model-00001-of-00004.safetensors",
454
- "visual.blocks.17.attn.proj.weight": "model-00001-of-00004.safetensors",
455
- "visual.blocks.17.attn.qkv.bias": "model-00001-of-00004.safetensors",
456
- "visual.blocks.17.attn.qkv.weight": "model-00001-of-00004.safetensors",
457
- "visual.blocks.17.mlp.fc1.bias": "model-00001-of-00004.safetensors",
458
- "visual.blocks.17.mlp.fc1.weight": "model-00001-of-00004.safetensors",
459
- "visual.blocks.17.mlp.fc2.bias": "model-00001-of-00004.safetensors",
460
- "visual.blocks.17.mlp.fc2.weight": "model-00001-of-00004.safetensors",
461
- "visual.blocks.17.norm1.bias": "model-00001-of-00004.safetensors",
462
- "visual.blocks.17.norm1.weight": "model-00001-of-00004.safetensors",
463
- "visual.blocks.17.norm2.bias": "model-00001-of-00004.safetensors",
464
- "visual.blocks.17.norm2.weight": "model-00001-of-00004.safetensors",
465
- "visual.blocks.18.attn.proj.bias": "model-00001-of-00004.safetensors",
466
- "visual.blocks.18.attn.proj.weight": "model-00001-of-00004.safetensors",
467
- "visual.blocks.18.attn.qkv.bias": "model-00001-of-00004.safetensors",
468
- "visual.blocks.18.attn.qkv.weight": "model-00001-of-00004.safetensors",
469
- "visual.blocks.18.mlp.fc1.bias": "model-00001-of-00004.safetensors",
470
- "visual.blocks.18.mlp.fc1.weight": "model-00001-of-00004.safetensors",
471
- "visual.blocks.18.mlp.fc2.bias": "model-00001-of-00004.safetensors",
472
- "visual.blocks.18.mlp.fc2.weight": "model-00001-of-00004.safetensors",
473
- "visual.blocks.18.norm1.bias": "model-00001-of-00004.safetensors",
474
- "visual.blocks.18.norm1.weight": "model-00001-of-00004.safetensors",
475
- "visual.blocks.18.norm2.bias": "model-00001-of-00004.safetensors",
476
- "visual.blocks.18.norm2.weight": "model-00001-of-00004.safetensors",
477
- "visual.blocks.19.attn.proj.bias": "model-00001-of-00004.safetensors",
478
- "visual.blocks.19.attn.proj.weight": "model-00001-of-00004.safetensors",
479
- "visual.blocks.19.attn.qkv.bias": "model-00001-of-00004.safetensors",
480
- "visual.blocks.19.attn.qkv.weight": "model-00001-of-00004.safetensors",
481
- "visual.blocks.19.mlp.fc1.bias": "model-00001-of-00004.safetensors",
482
- "visual.blocks.19.mlp.fc1.weight": "model-00001-of-00004.safetensors",
483
- "visual.blocks.19.mlp.fc2.bias": "model-00001-of-00004.safetensors",
484
- "visual.blocks.19.mlp.fc2.weight": "model-00001-of-00004.safetensors",
485
- "visual.blocks.19.norm1.bias": "model-00001-of-00004.safetensors",
486
- "visual.blocks.19.norm1.weight": "model-00001-of-00004.safetensors",
487
- "visual.blocks.19.norm2.bias": "model-00001-of-00004.safetensors",
488
- "visual.blocks.19.norm2.weight": "model-00001-of-00004.safetensors",
489
- "visual.blocks.2.attn.proj.bias": "model-00001-of-00004.safetensors",
490
- "visual.blocks.2.attn.proj.weight": "model-00001-of-00004.safetensors",
491
- "visual.blocks.2.attn.qkv.bias": "model-00001-of-00004.safetensors",
492
- "visual.blocks.2.attn.qkv.weight": "model-00001-of-00004.safetensors",
493
- "visual.blocks.2.mlp.fc1.bias": "model-00001-of-00004.safetensors",
494
- "visual.blocks.2.mlp.fc1.weight": "model-00001-of-00004.safetensors",
495
- "visual.blocks.2.mlp.fc2.bias": "model-00001-of-00004.safetensors",
496
- "visual.blocks.2.mlp.fc2.weight": "model-00001-of-00004.safetensors",
497
- "visual.blocks.2.norm1.bias": "model-00001-of-00004.safetensors",
498
- "visual.blocks.2.norm1.weight": "model-00001-of-00004.safetensors",
499
- "visual.blocks.2.norm2.bias": "model-00001-of-00004.safetensors",
500
- "visual.blocks.2.norm2.weight": "model-00001-of-00004.safetensors",
501
- "visual.blocks.20.attn.proj.bias": "model-00001-of-00004.safetensors",
502
- "visual.blocks.20.attn.proj.weight": "model-00001-of-00004.safetensors",
503
- "visual.blocks.20.attn.qkv.bias": "model-00001-of-00004.safetensors",
504
- "visual.blocks.20.attn.qkv.weight": "model-00001-of-00004.safetensors",
505
- "visual.blocks.20.mlp.fc1.bias": "model-00001-of-00004.safetensors",
506
- "visual.blocks.20.mlp.fc1.weight": "model-00001-of-00004.safetensors",
507
- "visual.blocks.20.mlp.fc2.bias": "model-00001-of-00004.safetensors",
508
- "visual.blocks.20.mlp.fc2.weight": "model-00001-of-00004.safetensors",
509
- "visual.blocks.20.norm1.bias": "model-00001-of-00004.safetensors",
510
- "visual.blocks.20.norm1.weight": "model-00001-of-00004.safetensors",
511
- "visual.blocks.20.norm2.bias": "model-00001-of-00004.safetensors",
512
- "visual.blocks.20.norm2.weight": "model-00001-of-00004.safetensors",
513
- "visual.blocks.21.attn.proj.bias": "model-00001-of-00004.safetensors",
514
- "visual.blocks.21.attn.proj.weight": "model-00001-of-00004.safetensors",
515
- "visual.blocks.21.attn.qkv.bias": "model-00001-of-00004.safetensors",
516
- "visual.blocks.21.attn.qkv.weight": "model-00001-of-00004.safetensors",
517
- "visual.blocks.21.mlp.fc1.bias": "model-00001-of-00004.safetensors",
518
- "visual.blocks.21.mlp.fc1.weight": "model-00001-of-00004.safetensors",
519
- "visual.blocks.21.mlp.fc2.bias": "model-00001-of-00004.safetensors",
520
- "visual.blocks.21.mlp.fc2.weight": "model-00001-of-00004.safetensors",
521
- "visual.blocks.21.norm1.bias": "model-00001-of-00004.safetensors",
522
- "visual.blocks.21.norm1.weight": "model-00001-of-00004.safetensors",
523
- "visual.blocks.21.norm2.bias": "model-00001-of-00004.safetensors",
524
- "visual.blocks.21.norm2.weight": "model-00001-of-00004.safetensors",
525
- "visual.blocks.22.attn.proj.bias": "model-00001-of-00004.safetensors",
526
- "visual.blocks.22.attn.proj.weight": "model-00001-of-00004.safetensors",
527
- "visual.blocks.22.attn.qkv.bias": "model-00001-of-00004.safetensors",
528
- "visual.blocks.22.attn.qkv.weight": "model-00001-of-00004.safetensors",
529
- "visual.blocks.22.mlp.fc1.bias": "model-00001-of-00004.safetensors",
530
- "visual.blocks.22.mlp.fc1.weight": "model-00001-of-00004.safetensors",
531
- "visual.blocks.22.mlp.fc2.bias": "model-00001-of-00004.safetensors",
532
- "visual.blocks.22.mlp.fc2.weight": "model-00001-of-00004.safetensors",
533
- "visual.blocks.22.norm1.bias": "model-00001-of-00004.safetensors",
534
- "visual.blocks.22.norm1.weight": "model-00001-of-00004.safetensors",
535
- "visual.blocks.22.norm2.bias": "model-00001-of-00004.safetensors",
536
- "visual.blocks.22.norm2.weight": "model-00001-of-00004.safetensors",
537
- "visual.blocks.23.attn.proj.bias": "model-00001-of-00004.safetensors",
538
- "visual.blocks.23.attn.proj.weight": "model-00001-of-00004.safetensors",
539
- "visual.blocks.23.attn.qkv.bias": "model-00001-of-00004.safetensors",
540
- "visual.blocks.23.attn.qkv.weight": "model-00001-of-00004.safetensors",
541
- "visual.blocks.23.mlp.fc1.bias": "model-00001-of-00004.safetensors",
542
- "visual.blocks.23.mlp.fc1.weight": "model-00001-of-00004.safetensors",
543
- "visual.blocks.23.mlp.fc2.bias": "model-00001-of-00004.safetensors",
544
- "visual.blocks.23.mlp.fc2.weight": "model-00001-of-00004.safetensors",
545
- "visual.blocks.23.norm1.bias": "model-00001-of-00004.safetensors",
546
- "visual.blocks.23.norm1.weight": "model-00001-of-00004.safetensors",
547
- "visual.blocks.23.norm2.bias": "model-00001-of-00004.safetensors",
548
- "visual.blocks.23.norm2.weight": "model-00001-of-00004.safetensors",
549
- "visual.blocks.24.attn.proj.bias": "model-00001-of-00004.safetensors",
550
- "visual.blocks.24.attn.proj.weight": "model-00001-of-00004.safetensors",
551
- "visual.blocks.24.attn.qkv.bias": "model-00001-of-00004.safetensors",
552
- "visual.blocks.24.attn.qkv.weight": "model-00001-of-00004.safetensors",
553
- "visual.blocks.24.mlp.fc1.bias": "model-00001-of-00004.safetensors",
554
- "visual.blocks.24.mlp.fc1.weight": "model-00001-of-00004.safetensors",
555
- "visual.blocks.24.mlp.fc2.bias": "model-00001-of-00004.safetensors",
556
- "visual.blocks.24.mlp.fc2.weight": "model-00001-of-00004.safetensors",
557
- "visual.blocks.24.norm1.bias": "model-00001-of-00004.safetensors",
558
- "visual.blocks.24.norm1.weight": "model-00001-of-00004.safetensors",
559
- "visual.blocks.24.norm2.bias": "model-00001-of-00004.safetensors",
560
- "visual.blocks.24.norm2.weight": "model-00001-of-00004.safetensors",
561
- "visual.blocks.25.attn.proj.bias": "model-00001-of-00004.safetensors",
562
- "visual.blocks.25.attn.proj.weight": "model-00001-of-00004.safetensors",
563
- "visual.blocks.25.attn.qkv.bias": "model-00001-of-00004.safetensors",
564
- "visual.blocks.25.attn.qkv.weight": "model-00001-of-00004.safetensors",
565
- "visual.blocks.25.mlp.fc1.bias": "model-00001-of-00004.safetensors",
566
- "visual.blocks.25.mlp.fc1.weight": "model-00001-of-00004.safetensors",
567
- "visual.blocks.25.mlp.fc2.bias": "model-00001-of-00004.safetensors",
568
- "visual.blocks.25.mlp.fc2.weight": "model-00001-of-00004.safetensors",
569
- "visual.blocks.25.norm1.bias": "model-00001-of-00004.safetensors",
570
- "visual.blocks.25.norm1.weight": "model-00001-of-00004.safetensors",
571
- "visual.blocks.25.norm2.bias": "model-00001-of-00004.safetensors",
572
- "visual.blocks.25.norm2.weight": "model-00001-of-00004.safetensors",
573
- "visual.blocks.26.attn.proj.bias": "model-00001-of-00004.safetensors",
574
- "visual.blocks.26.attn.proj.weight": "model-00001-of-00004.safetensors",
575
- "visual.blocks.26.attn.qkv.bias": "model-00001-of-00004.safetensors",
576
- "visual.blocks.26.attn.qkv.weight": "model-00001-of-00004.safetensors",
577
- "visual.blocks.26.mlp.fc1.bias": "model-00001-of-00004.safetensors",
578
- "visual.blocks.26.mlp.fc1.weight": "model-00001-of-00004.safetensors",
579
- "visual.blocks.26.mlp.fc2.bias": "model-00001-of-00004.safetensors",
580
- "visual.blocks.26.mlp.fc2.weight": "model-00001-of-00004.safetensors",
581
- "visual.blocks.26.norm1.bias": "model-00001-of-00004.safetensors",
582
- "visual.blocks.26.norm1.weight": "model-00001-of-00004.safetensors",
583
- "visual.blocks.26.norm2.bias": "model-00001-of-00004.safetensors",
584
- "visual.blocks.26.norm2.weight": "model-00001-of-00004.safetensors",
585
- "visual.blocks.27.attn.proj.bias": "model-00001-of-00004.safetensors",
586
- "visual.blocks.27.attn.proj.weight": "model-00001-of-00004.safetensors",
587
- "visual.blocks.27.attn.qkv.bias": "model-00001-of-00004.safetensors",
588
- "visual.blocks.27.attn.qkv.weight": "model-00001-of-00004.safetensors",
589
- "visual.blocks.27.mlp.fc1.bias": "model-00001-of-00004.safetensors",
590
- "visual.blocks.27.mlp.fc1.weight": "model-00001-of-00004.safetensors",
591
- "visual.blocks.27.mlp.fc2.bias": "model-00001-of-00004.safetensors",
592
- "visual.blocks.27.mlp.fc2.weight": "model-00001-of-00004.safetensors",
593
- "visual.blocks.27.norm1.bias": "model-00001-of-00004.safetensors",
594
- "visual.blocks.27.norm1.weight": "model-00001-of-00004.safetensors",
595
- "visual.blocks.27.norm2.bias": "model-00001-of-00004.safetensors",
596
- "visual.blocks.27.norm2.weight": "model-00001-of-00004.safetensors",
597
- "visual.blocks.28.attn.proj.bias": "model-00001-of-00004.safetensors",
598
- "visual.blocks.28.attn.proj.weight": "model-00001-of-00004.safetensors",
599
- "visual.blocks.28.attn.qkv.bias": "model-00001-of-00004.safetensors",
600
- "visual.blocks.28.attn.qkv.weight": "model-00001-of-00004.safetensors",
601
- "visual.blocks.28.mlp.fc1.bias": "model-00001-of-00004.safetensors",
602
- "visual.blocks.28.mlp.fc1.weight": "model-00001-of-00004.safetensors",
603
- "visual.blocks.28.mlp.fc2.bias": "model-00001-of-00004.safetensors",
604
- "visual.blocks.28.mlp.fc2.weight": "model-00001-of-00004.safetensors",
605
- "visual.blocks.28.norm1.bias": "model-00001-of-00004.safetensors",
606
- "visual.blocks.28.norm1.weight": "model-00001-of-00004.safetensors",
607
- "visual.blocks.28.norm2.bias": "model-00001-of-00004.safetensors",
608
- "visual.blocks.28.norm2.weight": "model-00001-of-00004.safetensors",
609
- "visual.blocks.29.attn.proj.bias": "model-00001-of-00004.safetensors",
610
- "visual.blocks.29.attn.proj.weight": "model-00001-of-00004.safetensors",
611
- "visual.blocks.29.attn.qkv.bias": "model-00001-of-00004.safetensors",
612
- "visual.blocks.29.attn.qkv.weight": "model-00001-of-00004.safetensors",
613
- "visual.blocks.29.mlp.fc1.bias": "model-00001-of-00004.safetensors",
614
- "visual.blocks.29.mlp.fc1.weight": "model-00001-of-00004.safetensors",
615
- "visual.blocks.29.mlp.fc2.bias": "model-00001-of-00004.safetensors",
616
- "visual.blocks.29.mlp.fc2.weight": "model-00001-of-00004.safetensors",
617
- "visual.blocks.29.norm1.bias": "model-00001-of-00004.safetensors",
618
- "visual.blocks.29.norm1.weight": "model-00001-of-00004.safetensors",
619
- "visual.blocks.29.norm2.bias": "model-00001-of-00004.safetensors",
620
- "visual.blocks.29.norm2.weight": "model-00001-of-00004.safetensors",
621
- "visual.blocks.3.attn.proj.bias": "model-00001-of-00004.safetensors",
622
- "visual.blocks.3.attn.proj.weight": "model-00001-of-00004.safetensors",
623
- "visual.blocks.3.attn.qkv.bias": "model-00001-of-00004.safetensors",
624
- "visual.blocks.3.attn.qkv.weight": "model-00001-of-00004.safetensors",
625
- "visual.blocks.3.mlp.fc1.bias": "model-00001-of-00004.safetensors",
626
- "visual.blocks.3.mlp.fc1.weight": "model-00001-of-00004.safetensors",
627
- "visual.blocks.3.mlp.fc2.bias": "model-00001-of-00004.safetensors",
628
- "visual.blocks.3.mlp.fc2.weight": "model-00001-of-00004.safetensors",
629
- "visual.blocks.3.norm1.bias": "model-00001-of-00004.safetensors",
630
- "visual.blocks.3.norm1.weight": "model-00001-of-00004.safetensors",
631
- "visual.blocks.3.norm2.bias": "model-00001-of-00004.safetensors",
632
- "visual.blocks.3.norm2.weight": "model-00001-of-00004.safetensors",
633
- "visual.blocks.30.attn.proj.bias": "model-00001-of-00004.safetensors",
634
- "visual.blocks.30.attn.proj.weight": "model-00001-of-00004.safetensors",
635
- "visual.blocks.30.attn.qkv.bias": "model-00001-of-00004.safetensors",
636
- "visual.blocks.30.attn.qkv.weight": "model-00001-of-00004.safetensors",
637
- "visual.blocks.30.mlp.fc1.bias": "model-00001-of-00004.safetensors",
638
- "visual.blocks.30.mlp.fc1.weight": "model-00001-of-00004.safetensors",
639
- "visual.blocks.30.mlp.fc2.bias": "model-00001-of-00004.safetensors",
640
- "visual.blocks.30.mlp.fc2.weight": "model-00001-of-00004.safetensors",
641
- "visual.blocks.30.norm1.bias": "model-00001-of-00004.safetensors",
642
- "visual.blocks.30.norm1.weight": "model-00001-of-00004.safetensors",
643
- "visual.blocks.30.norm2.bias": "model-00001-of-00004.safetensors",
644
- "visual.blocks.30.norm2.weight": "model-00001-of-00004.safetensors",
645
- "visual.blocks.31.attn.proj.bias": "model-00001-of-00004.safetensors",
646
- "visual.blocks.31.attn.proj.weight": "model-00001-of-00004.safetensors",
647
- "visual.blocks.31.attn.qkv.bias": "model-00001-of-00004.safetensors",
648
- "visual.blocks.31.attn.qkv.weight": "model-00001-of-00004.safetensors",
649
- "visual.blocks.31.mlp.fc1.bias": "model-00001-of-00004.safetensors",
650
- "visual.blocks.31.mlp.fc1.weight": "model-00001-of-00004.safetensors",
651
- "visual.blocks.31.mlp.fc2.bias": "model-00001-of-00004.safetensors",
652
- "visual.blocks.31.mlp.fc2.weight": "model-00001-of-00004.safetensors",
653
- "visual.blocks.31.norm1.bias": "model-00001-of-00004.safetensors",
654
- "visual.blocks.31.norm1.weight": "model-00001-of-00004.safetensors",
655
- "visual.blocks.31.norm2.bias": "model-00001-of-00004.safetensors",
656
- "visual.blocks.31.norm2.weight": "model-00001-of-00004.safetensors",
657
- "visual.blocks.4.attn.proj.bias": "model-00001-of-00004.safetensors",
658
- "visual.blocks.4.attn.proj.weight": "model-00001-of-00004.safetensors",
659
- "visual.blocks.4.attn.qkv.bias": "model-00001-of-00004.safetensors",
660
- "visual.blocks.4.attn.qkv.weight": "model-00001-of-00004.safetensors",
661
- "visual.blocks.4.mlp.fc1.bias": "model-00001-of-00004.safetensors",
662
- "visual.blocks.4.mlp.fc1.weight": "model-00001-of-00004.safetensors",
663
- "visual.blocks.4.mlp.fc2.bias": "model-00001-of-00004.safetensors",
664
- "visual.blocks.4.mlp.fc2.weight": "model-00001-of-00004.safetensors",
665
- "visual.blocks.4.norm1.bias": "model-00001-of-00004.safetensors",
666
- "visual.blocks.4.norm1.weight": "model-00001-of-00004.safetensors",
667
- "visual.blocks.4.norm2.bias": "model-00001-of-00004.safetensors",
668
- "visual.blocks.4.norm2.weight": "model-00001-of-00004.safetensors",
669
- "visual.blocks.5.attn.proj.bias": "model-00001-of-00004.safetensors",
670
- "visual.blocks.5.attn.proj.weight": "model-00001-of-00004.safetensors",
671
- "visual.blocks.5.attn.qkv.bias": "model-00001-of-00004.safetensors",
672
- "visual.blocks.5.attn.qkv.weight": "model-00001-of-00004.safetensors",
673
- "visual.blocks.5.mlp.fc1.bias": "model-00001-of-00004.safetensors",
674
- "visual.blocks.5.mlp.fc1.weight": "model-00001-of-00004.safetensors",
675
- "visual.blocks.5.mlp.fc2.bias": "model-00001-of-00004.safetensors",
676
- "visual.blocks.5.mlp.fc2.weight": "model-00001-of-00004.safetensors",
677
- "visual.blocks.5.norm1.bias": "model-00001-of-00004.safetensors",
678
- "visual.blocks.5.norm1.weight": "model-00001-of-00004.safetensors",
679
- "visual.blocks.5.norm2.bias": "model-00001-of-00004.safetensors",
680
- "visual.blocks.5.norm2.weight": "model-00001-of-00004.safetensors",
681
- "visual.blocks.6.attn.proj.bias": "model-00001-of-00004.safetensors",
682
- "visual.blocks.6.attn.proj.weight": "model-00001-of-00004.safetensors",
683
- "visual.blocks.6.attn.qkv.bias": "model-00001-of-00004.safetensors",
684
- "visual.blocks.6.attn.qkv.weight": "model-00001-of-00004.safetensors",
685
- "visual.blocks.6.mlp.fc1.bias": "model-00001-of-00004.safetensors",
686
- "visual.blocks.6.mlp.fc1.weight": "model-00001-of-00004.safetensors",
687
- "visual.blocks.6.mlp.fc2.bias": "model-00001-of-00004.safetensors",
688
- "visual.blocks.6.mlp.fc2.weight": "model-00001-of-00004.safetensors",
689
- "visual.blocks.6.norm1.bias": "model-00001-of-00004.safetensors",
690
- "visual.blocks.6.norm1.weight": "model-00001-of-00004.safetensors",
691
- "visual.blocks.6.norm2.bias": "model-00001-of-00004.safetensors",
692
- "visual.blocks.6.norm2.weight": "model-00001-of-00004.safetensors",
693
- "visual.blocks.7.attn.proj.bias": "model-00001-of-00004.safetensors",
694
- "visual.blocks.7.attn.proj.weight": "model-00001-of-00004.safetensors",
695
- "visual.blocks.7.attn.qkv.bias": "model-00001-of-00004.safetensors",
696
- "visual.blocks.7.attn.qkv.weight": "model-00001-of-00004.safetensors",
697
- "visual.blocks.7.mlp.fc1.bias": "model-00001-of-00004.safetensors",
698
- "visual.blocks.7.mlp.fc1.weight": "model-00001-of-00004.safetensors",
699
- "visual.blocks.7.mlp.fc2.bias": "model-00001-of-00004.safetensors",
700
- "visual.blocks.7.mlp.fc2.weight": "model-00001-of-00004.safetensors",
701
- "visual.blocks.7.norm1.bias": "model-00001-of-00004.safetensors",
702
- "visual.blocks.7.norm1.weight": "model-00001-of-00004.safetensors",
703
- "visual.blocks.7.norm2.bias": "model-00001-of-00004.safetensors",
704
- "visual.blocks.7.norm2.weight": "model-00001-of-00004.safetensors",
705
- "visual.blocks.8.attn.proj.bias": "model-00001-of-00004.safetensors",
706
- "visual.blocks.8.attn.proj.weight": "model-00001-of-00004.safetensors",
707
- "visual.blocks.8.attn.qkv.bias": "model-00001-of-00004.safetensors",
708
- "visual.blocks.8.attn.qkv.weight": "model-00001-of-00004.safetensors",
709
- "visual.blocks.8.mlp.fc1.bias": "model-00001-of-00004.safetensors",
710
- "visual.blocks.8.mlp.fc1.weight": "model-00001-of-00004.safetensors",
711
- "visual.blocks.8.mlp.fc2.bias": "model-00001-of-00004.safetensors",
712
- "visual.blocks.8.mlp.fc2.weight": "model-00001-of-00004.safetensors",
713
- "visual.blocks.8.norm1.bias": "model-00001-of-00004.safetensors",
714
- "visual.blocks.8.norm1.weight": "model-00001-of-00004.safetensors",
715
- "visual.blocks.8.norm2.bias": "model-00001-of-00004.safetensors",
716
- "visual.blocks.8.norm2.weight": "model-00001-of-00004.safetensors",
717
- "visual.blocks.9.attn.proj.bias": "model-00001-of-00004.safetensors",
718
- "visual.blocks.9.attn.proj.weight": "model-00001-of-00004.safetensors",
719
- "visual.blocks.9.attn.qkv.bias": "model-00001-of-00004.safetensors",
720
- "visual.blocks.9.attn.qkv.weight": "model-00001-of-00004.safetensors",
721
- "visual.blocks.9.mlp.fc1.bias": "model-00001-of-00004.safetensors",
722
- "visual.blocks.9.mlp.fc1.weight": "model-00001-of-00004.safetensors",
723
- "visual.blocks.9.mlp.fc2.bias": "model-00001-of-00004.safetensors",
724
- "visual.blocks.9.mlp.fc2.weight": "model-00001-of-00004.safetensors",
725
- "visual.blocks.9.norm1.bias": "model-00001-of-00004.safetensors",
726
- "visual.blocks.9.norm1.weight": "model-00001-of-00004.safetensors",
727
- "visual.blocks.9.norm2.bias": "model-00001-of-00004.safetensors",
728
- "visual.blocks.9.norm2.weight": "model-00001-of-00004.safetensors",
729
- "visual.merger.ln_q.bias": "model-00001-of-00004.safetensors",
730
- "visual.merger.ln_q.weight": "model-00001-of-00004.safetensors",
731
- "visual.merger.mlp.0.bias": "model-00001-of-00004.safetensors",
732
- "visual.merger.mlp.0.weight": "model-00001-of-00004.safetensors",
733
- "visual.merger.mlp.2.bias": "model-00001-of-00004.safetensors",
734
- "visual.merger.mlp.2.weight": "model-00001-of-00004.safetensors",
735
- "visual.patch_embed.proj.weight": "model-00001-of-00004.safetensors"
736
- }
737
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-160/preprocessor_config.json DELETED
@@ -1,29 +0,0 @@
1
- {
2
- "do_convert_rgb": true,
3
- "do_normalize": true,
4
- "do_rescale": true,
5
- "do_resize": true,
6
- "image_mean": [
7
- 0.48145466,
8
- 0.4578275,
9
- 0.40821073
10
- ],
11
- "image_processor_type": "Qwen2VLImageProcessor",
12
- "image_std": [
13
- 0.26862954,
14
- 0.26130258,
15
- 0.27577711
16
- ],
17
- "max_pixels": 12845056,
18
- "merge_size": 2,
19
- "min_pixels": 3136,
20
- "patch_size": 14,
21
- "processor_class": "Qwen2VLProcessor",
22
- "resample": 3,
23
- "rescale_factor": 0.00392156862745098,
24
- "size": {
25
- "max_pixels": 12845056,
26
- "min_pixels": 3136
27
- },
28
- "temporal_patch_size": 2
29
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-160/special_tokens_map.json DELETED
@@ -1,31 +0,0 @@
1
- {
2
- "additional_special_tokens": [
3
- "<|im_start|>",
4
- "<|im_end|>",
5
- "<|object_ref_start|>",
6
- "<|object_ref_end|>",
7
- "<|box_start|>",
8
- "<|box_end|>",
9
- "<|quad_start|>",
10
- "<|quad_end|>",
11
- "<|vision_start|>",
12
- "<|vision_end|>",
13
- "<|vision_pad|>",
14
- "<|image_pad|>",
15
- "<|video_pad|>"
16
- ],
17
- "eos_token": {
18
- "content": "<|im_end|>",
19
- "lstrip": false,
20
- "normalized": false,
21
- "rstrip": false,
22
- "single_word": false
23
- },
24
- "pad_token": {
25
- "content": "<|endoftext|>",
26
- "lstrip": false,
27
- "normalized": false,
28
- "rstrip": false,
29
- "single_word": false
30
- }
31
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-160/tokenizer.json DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:88a3a6fcb80132f76da8aa40cdc3fccd7e5d8468ef15421f5b0c2715e85217d2
3
- size 11420538
 
 
 
 
checkpoint-160/tokenizer_config.json DELETED
@@ -1,145 +0,0 @@
1
- {
2
- "add_prefix_space": false,
3
- "added_tokens_decoder": {
4
- "151643": {
5
- "content": "<|endoftext|>",
6
- "lstrip": false,
7
- "normalized": false,
8
- "rstrip": false,
9
- "single_word": false,
10
- "special": true
11
- },
12
- "151644": {
13
- "content": "<|im_start|>",
14
- "lstrip": false,
15
- "normalized": false,
16
- "rstrip": false,
17
- "single_word": false,
18
- "special": true
19
- },
20
- "151645": {
21
- "content": "<|im_end|>",
22
- "lstrip": false,
23
- "normalized": false,
24
- "rstrip": false,
25
- "single_word": false,
26
- "special": true
27
- },
28
- "151646": {
29
- "content": "<|object_ref_start|>",
30
- "lstrip": false,
31
- "normalized": false,
32
- "rstrip": false,
33
- "single_word": false,
34
- "special": true
35
- },
36
- "151647": {
37
- "content": "<|object_ref_end|>",
38
- "lstrip": false,
39
- "normalized": false,
40
- "rstrip": false,
41
- "single_word": false,
42
- "special": true
43
- },
44
- "151648": {
45
- "content": "<|box_start|>",
46
- "lstrip": false,
47
- "normalized": false,
48
- "rstrip": false,
49
- "single_word": false,
50
- "special": true
51
- },
52
- "151649": {
53
- "content": "<|box_end|>",
54
- "lstrip": false,
55
- "normalized": false,
56
- "rstrip": false,
57
- "single_word": false,
58
- "special": true
59
- },
60
- "151650": {
61
- "content": "<|quad_start|>",
62
- "lstrip": false,
63
- "normalized": false,
64
- "rstrip": false,
65
- "single_word": false,
66
- "special": true
67
- },
68
- "151651": {
69
- "content": "<|quad_end|>",
70
- "lstrip": false,
71
- "normalized": false,
72
- "rstrip": false,
73
- "single_word": false,
74
- "special": true
75
- },
76
- "151652": {
77
- "content": "<|vision_start|>",
78
- "lstrip": false,
79
- "normalized": false,
80
- "rstrip": false,
81
- "single_word": false,
82
- "special": true
83
- },
84
- "151653": {
85
- "content": "<|vision_end|>",
86
- "lstrip": false,
87
- "normalized": false,
88
- "rstrip": false,
89
- "single_word": false,
90
- "special": true
91
- },
92
- "151654": {
93
- "content": "<|vision_pad|>",
94
- "lstrip": false,
95
- "normalized": false,
96
- "rstrip": false,
97
- "single_word": false,
98
- "special": true
99
- },
100
- "151655": {
101
- "content": "<|image_pad|>",
102
- "lstrip": false,
103
- "normalized": false,
104
- "rstrip": false,
105
- "single_word": false,
106
- "special": true
107
- },
108
- "151656": {
109
- "content": "<|video_pad|>",
110
- "lstrip": false,
111
- "normalized": false,
112
- "rstrip": false,
113
- "single_word": false,
114
- "special": true
115
- }
116
- },
117
- "additional_special_tokens": [
118
- "<|im_start|>",
119
- "<|im_end|>",
120
- "<|object_ref_start|>",
121
- "<|object_ref_end|>",
122
- "<|box_start|>",
123
- "<|box_end|>",
124
- "<|quad_start|>",
125
- "<|quad_end|>",
126
- "<|vision_start|>",
127
- "<|vision_end|>",
128
- "<|vision_pad|>",
129
- "<|image_pad|>",
130
- "<|video_pad|>"
131
- ],
132
- "bos_token": null,
133
- "chat_template": "{% set image_count = namespace(value=0) %}{% set video_count = namespace(value=0) %}{% for message in messages %}{% if loop.first and message['role'] != 'system' %}<|im_start|>system\nYou are a helpful assistant.<|im_end|>\n{% endif %}<|im_start|>{{ message['role'] }}\n{% if message['content'] is string %}{{ message['content'] }}<|im_end|>\n{% else %}{% for content in message['content'] %}{% if content['type'] == 'image' or 'image' in content or 'image_url' in content %}{% set image_count.value = image_count.value + 1 %}{% if add_vision_id %}Picture {{ image_count.value }}: {% endif %}<|vision_start|><|image_pad|><|vision_end|>{% elif content['type'] == 'video' or 'video' in content %}{% set video_count.value = video_count.value + 1 %}{% if add_vision_id %}Video {{ video_count.value }}: {% endif %}<|vision_start|><|video_pad|><|vision_end|>{% elif 'text' in content %}{{ content['text'] }}{% endif %}{% endfor %}<|im_end|>\n{% endif %}{% endfor %}{% if add_generation_prompt %}<|im_start|>assistant\n{% endif %}",
134
- "clean_up_tokenization_spaces": false,
135
- "eos_token": "<|im_end|>",
136
- "errors": "replace",
137
- "extra_special_tokens": {},
138
- "model_max_length": 32768,
139
- "pad_token": "<|endoftext|>",
140
- "padding_side": "left",
141
- "processor_class": "Qwen2VLProcessor",
142
- "split_special_tokens": false,
143
- "tokenizer_class": "Qwen2Tokenizer",
144
- "unk_token": null
145
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-160/trainer_state.json DELETED
@@ -1,2273 +0,0 @@
1
- {
2
- "best_metric": null,
3
- "best_model_checkpoint": null,
4
- "epoch": 0.13733905579399142,
5
- "eval_steps": 500,
6
- "global_step": 160,
7
- "is_hyper_param_search": false,
8
- "is_local_process_zero": true,
9
- "is_world_process_zero": true,
10
- "log_history": [
11
- {
12
- "advantages": 1.862645149230957e-08,
13
- "completion_length": 129.0625,
14
- "epoch": 0.0008583690987124463,
15
- "grad_norm": 174.6410675048828,
16
- "kl": 4.34375,
17
- "learning_rate": 9.991416309012877e-07,
18
- "loss": 0.434,
19
- "reward": 0.15625,
20
- "reward_mean": 0.15625,
21
- "reward_std": 0.3198433816432953,
22
- "rewards/accuracy_reward": 0.15625,
23
- "step": 1
24
- },
25
- {
26
- "advantages": 1.30385160446167e-08,
27
- "completion_length": 138.0625,
28
- "epoch": 0.0017167381974248926,
29
- "grad_norm": 332.6672668457031,
30
- "kl": 5.21875,
31
- "learning_rate": 9.982832618025751e-07,
32
- "loss": 0.5223,
33
- "reward": 0.34375,
34
- "reward_mean": 0.34375,
35
- "reward_std": 0.4628904461860657,
36
- "rewards/accuracy_reward": 0.34375,
37
- "step": 2
38
- },
39
- {
40
- "advantages": 2.421438694000244e-08,
41
- "completion_length": 117.4375,
42
- "epoch": 0.002575107296137339,
43
- "grad_norm": 166.6440887451172,
44
- "kl": 3.5,
45
- "learning_rate": 9.974248927038626e-07,
46
- "loss": 0.3496,
47
- "reward": 0.21875,
48
- "reward_mean": 0.21875,
49
- "reward_std": 0.3608423173427582,
50
- "rewards/accuracy_reward": 0.21875,
51
- "step": 3
52
- },
53
- {
54
- "advantages": 7.450580596923828e-09,
55
- "completion_length": 126.59375,
56
- "epoch": 0.0034334763948497852,
57
- "grad_norm": 111.89189910888672,
58
- "kl": 1.5078125,
59
- "learning_rate": 9.965665236051501e-07,
60
- "loss": 0.1505,
61
- "reward": 0.0625,
62
- "reward_mean": 0.0625,
63
- "reward_std": 0.1767766922712326,
64
- "rewards/accuracy_reward": 0.0625,
65
- "step": 4
66
- },
67
- {
68
- "advantages": 1.4901161193847656e-08,
69
- "completion_length": 164.875,
70
- "epoch": 0.004291845493562232,
71
- "grad_norm": 38.75446701049805,
72
- "kl": 1.359375,
73
- "learning_rate": 9.957081545064378e-07,
74
- "loss": 0.1359,
75
- "reward": 0.25,
76
- "reward_mean": 0.25,
77
- "reward_std": 0.4261348247528076,
78
- "rewards/accuracy_reward": 0.25,
79
- "step": 5
80
- },
81
- {
82
- "advantages": 2.0489096641540527e-08,
83
- "completion_length": 136.9375,
84
- "epoch": 0.005150214592274678,
85
- "grad_norm": 278.40093994140625,
86
- "kl": 4.5,
87
- "learning_rate": 9.948497854077253e-07,
88
- "loss": 0.4497,
89
- "reward": 0.1875,
90
- "reward_mean": 0.1875,
91
- "reward_std": 0.3945523500442505,
92
- "rewards/accuracy_reward": 0.1875,
93
- "step": 6
94
- },
95
- {
96
- "advantages": 2.60770320892334e-08,
97
- "completion_length": 150.8125,
98
- "epoch": 0.006008583690987125,
99
- "grad_norm": 189.5176544189453,
100
- "kl": 2.75,
101
- "learning_rate": 9.939914163090128e-07,
102
- "loss": 0.2753,
103
- "reward": 0.375,
104
- "reward_mean": 0.375,
105
- "reward_std": 0.5081326961517334,
106
- "rewards/accuracy_reward": 0.375,
107
- "step": 7
108
- },
109
- {
110
- "advantages": 1.30385160446167e-08,
111
- "completion_length": 135.5,
112
- "epoch": 0.0068669527896995704,
113
- "grad_norm": 1704.9647216796875,
114
- "kl": 4.34375,
115
- "learning_rate": 9.931330472103003e-07,
116
- "loss": 0.4358,
117
- "reward": 0.125,
118
- "reward_mean": 0.125,
119
- "reward_std": 0.2177756428718567,
120
- "rewards/accuracy_reward": 0.125,
121
- "step": 8
122
- },
123
- {
124
- "advantages": 1.1175870895385742e-08,
125
- "completion_length": 116.96875,
126
- "epoch": 0.007725321888412017,
127
- "grad_norm": 26.27825927734375,
128
- "kl": 0.8671875,
129
- "learning_rate": 9.92274678111588e-07,
130
- "loss": 0.0868,
131
- "reward": 0.09375,
132
- "reward_mean": 0.09375,
133
- "reward_std": 0.2651650309562683,
134
- "rewards/accuracy_reward": 0.09375,
135
- "step": 9
136
- },
137
- {
138
- "advantages": -9.313225746154785e-09,
139
- "completion_length": 105.625,
140
- "epoch": 0.008583690987124463,
141
- "grad_norm": 115.19660949707031,
142
- "kl": 1.28125,
143
- "learning_rate": 9.914163090128755e-07,
144
- "loss": 0.1278,
145
- "reward": 0.53125,
146
- "reward_mean": 0.53125,
147
- "reward_std": 0.521792471408844,
148
- "rewards/accuracy_reward": 0.53125,
149
- "step": 10
150
- },
151
- {
152
- "advantages": 1.30385160446167e-08,
153
- "completion_length": 125.78125,
154
- "epoch": 0.00944206008583691,
155
- "grad_norm": 242.439453125,
156
- "kl": 3.75,
157
- "learning_rate": 9.90557939914163e-07,
158
- "loss": 0.3733,
159
- "reward": 0.125,
160
- "reward_mean": 0.125,
161
- "reward_std": 0.2177756428718567,
162
- "rewards/accuracy_reward": 0.125,
163
- "step": 11
164
- },
165
- {
166
- "advantages": 2.421438694000244e-08,
167
- "completion_length": 140.65625,
168
- "epoch": 0.010300429184549357,
169
- "grad_norm": 57.61454772949219,
170
- "kl": 1.5625,
171
- "learning_rate": 9.896995708154506e-07,
172
- "loss": 0.1566,
173
- "reward": 0.21875,
174
- "reward_mean": 0.21875,
175
- "reward_std": 0.4218915104866028,
176
- "rewards/accuracy_reward": 0.21875,
177
- "step": 12
178
- },
179
- {
180
- "advantages": 1.4901161193847656e-08,
181
- "completion_length": 122.3125,
182
- "epoch": 0.011158798283261802,
183
- "grad_norm": 9.250263214111328,
184
- "kl": 0.416015625,
185
- "learning_rate": 9.888412017167381e-07,
186
- "loss": 0.0417,
187
- "reward": 0.125,
188
- "reward_mean": 0.125,
189
- "reward_std": 0.292504221200943,
190
- "rewards/accuracy_reward": 0.125,
191
- "step": 13
192
- },
193
- {
194
- "advantages": 7.450580596923828e-09,
195
- "completion_length": 134.34375,
196
- "epoch": 0.01201716738197425,
197
- "grad_norm": 29.975547790527344,
198
- "kl": 0.76171875,
199
- "learning_rate": 9.879828326180258e-07,
200
- "loss": 0.0763,
201
- "reward": 0.1875,
202
- "reward_mean": 0.1875,
203
- "reward_std": 0.3104073107242584,
204
- "rewards/accuracy_reward": 0.1875,
205
- "step": 14
206
- },
207
- {
208
- "advantages": -5.587935447692871e-09,
209
- "completion_length": 136.5,
210
- "epoch": 0.012875536480686695,
211
- "grad_norm": 35.626949310302734,
212
- "kl": 0.9140625,
213
- "learning_rate": 9.871244635193133e-07,
214
- "loss": 0.0915,
215
- "reward": 0.3125,
216
- "reward_mean": 0.3125,
217
- "reward_std": 0.3514062464237213,
218
- "rewards/accuracy_reward": 0.3125,
219
- "step": 15
220
- },
221
- {
222
- "advantages": 2.421438694000244e-08,
223
- "completion_length": 133.84375,
224
- "epoch": 0.013733905579399141,
225
- "grad_norm": 127.25942993164062,
226
- "kl": 1.765625,
227
- "learning_rate": 9.862660944206008e-07,
228
- "loss": 0.176,
229
- "reward": 0.21875,
230
- "reward_mean": 0.21875,
231
- "reward_std": 0.3608423173427582,
232
- "rewards/accuracy_reward": 0.21875,
233
- "step": 16
234
- },
235
- {
236
- "advantages": 2.0489096641540527e-08,
237
- "completion_length": 110.25,
238
- "epoch": 0.014592274678111588,
239
- "grad_norm": 51.83127975463867,
240
- "kl": 0.52734375,
241
- "learning_rate": 9.854077253218883e-07,
242
- "loss": 0.0526,
243
- "reward": 0.1875,
244
- "reward_mean": 0.1875,
245
- "reward_std": 0.3335031569004059,
246
- "rewards/accuracy_reward": 0.1875,
247
- "step": 17
248
- },
249
- {
250
- "advantages": 1.6763806343078613e-08,
251
- "completion_length": 132.71875,
252
- "epoch": 0.015450643776824034,
253
- "grad_norm": 71.73090362548828,
254
- "kl": 1.375,
255
- "learning_rate": 9.84549356223176e-07,
256
- "loss": 0.1369,
257
- "reward": 0.4375,
258
- "reward_mean": 0.4375,
259
- "reward_std": 0.5260357856750488,
260
- "rewards/accuracy_reward": 0.4375,
261
- "step": 18
262
- },
263
- {
264
- "advantages": 1.4901161193847656e-08,
265
- "completion_length": 139.96875,
266
- "epoch": 0.01630901287553648,
267
- "grad_norm": 47.92875289916992,
268
- "kl": 0.92578125,
269
- "learning_rate": 9.836909871244635e-07,
270
- "loss": 0.0925,
271
- "reward": 0.15625,
272
- "reward_mean": 0.15625,
273
- "reward_std": 0.24511480331420898,
274
- "rewards/accuracy_reward": 0.15625,
275
- "step": 19
276
- },
277
- {
278
- "advantages": 2.60770320892334e-08,
279
- "completion_length": 139.8125,
280
- "epoch": 0.017167381974248927,
281
- "grad_norm": 32.31055450439453,
282
- "kl": 0.703125,
283
- "learning_rate": 9.82832618025751e-07,
284
- "loss": 0.0701,
285
- "reward": 0.25,
286
- "reward_mean": 0.25,
287
- "reward_std": 0.4355512857437134,
288
- "rewards/accuracy_reward": 0.25,
289
- "step": 20
290
- },
291
- {
292
- "advantages": 1.1175870895385742e-08,
293
- "completion_length": 122.3125,
294
- "epoch": 0.018025751072961373,
295
- "grad_norm": 12.536384582519531,
296
- "kl": 0.423828125,
297
- "learning_rate": 9.819742489270387e-07,
298
- "loss": 0.0424,
299
- "reward": 0.09375,
300
- "reward_mean": 0.09375,
301
- "reward_std": 0.2651650309562683,
302
- "rewards/accuracy_reward": 0.09375,
303
- "step": 21
304
- },
305
- {
306
- "advantages": 1.4901161193847656e-08,
307
- "completion_length": 155.6875,
308
- "epoch": 0.01888412017167382,
309
- "grad_norm": 9.963027954101562,
310
- "kl": 0.421875,
311
- "learning_rate": 9.811158798283261e-07,
312
- "loss": 0.0421,
313
- "reward": 0.125,
314
- "reward_mean": 0.125,
315
- "reward_std": 0.3535533845424652,
316
- "rewards/accuracy_reward": 0.125,
317
- "step": 22
318
- },
319
- {
320
- "advantages": 1.1175870895385742e-08,
321
- "completion_length": 137.15625,
322
- "epoch": 0.019742489270386267,
323
- "grad_norm": 6.500565052032471,
324
- "kl": 0.25,
325
- "learning_rate": 9.802575107296136e-07,
326
- "loss": 0.025,
327
- "reward": 0.09375,
328
- "reward_mean": 0.09375,
329
- "reward_std": 0.2651650309562683,
330
- "rewards/accuracy_reward": 0.09375,
331
- "step": 23
332
- },
333
- {
334
- "advantages": 5.587935447692871e-09,
335
- "completion_length": 143.4375,
336
- "epoch": 0.020600858369098713,
337
- "grad_norm": 4.852046012878418,
338
- "kl": 0.208984375,
339
- "learning_rate": 9.793991416309011e-07,
340
- "loss": 0.021,
341
- "reward": 0.28125,
342
- "reward_mean": 0.28125,
343
- "reward_std": 0.3608423173427582,
344
- "rewards/accuracy_reward": 0.28125,
345
- "step": 24
346
- },
347
- {
348
- "advantages": 1.4901161193847656e-08,
349
- "completion_length": 130.03125,
350
- "epoch": 0.02145922746781116,
351
- "grad_norm": 11.683233261108398,
352
- "kl": 0.396484375,
353
- "learning_rate": 9.785407725321888e-07,
354
- "loss": 0.0395,
355
- "reward": 0.125,
356
- "reward_mean": 0.125,
357
- "reward_std": 0.2925041913986206,
358
- "rewards/accuracy_reward": 0.125,
359
- "step": 25
360
- },
361
- {
362
- "advantages": 3.166496753692627e-08,
363
- "completion_length": 150.59375,
364
- "epoch": 0.022317596566523604,
365
- "grad_norm": 7.088483810424805,
366
- "kl": 0.29296875,
367
- "learning_rate": 9.776824034334763e-07,
368
- "loss": 0.0294,
369
- "reward": 0.28125,
370
- "reward_mean": 0.28125,
371
- "reward_std": 0.4765698313713074,
372
- "rewards/accuracy_reward": 0.28125,
373
- "step": 26
374
- },
375
- {
376
- "advantages": 2.9802322387695312e-08,
377
- "completion_length": 125.1875,
378
- "epoch": 0.02317596566523605,
379
- "grad_norm": 9.142675399780273,
380
- "kl": 0.41015625,
381
- "learning_rate": 9.76824034334764e-07,
382
- "loss": 0.0412,
383
- "reward": 0.28125,
384
- "reward_mean": 0.28125,
385
- "reward_std": 0.4628904461860657,
386
- "rewards/accuracy_reward": 0.28125,
387
- "step": 27
388
- },
389
- {
390
- "advantages": 1.30385160446167e-08,
391
- "completion_length": 146.21875,
392
- "epoch": 0.0240343347639485,
393
- "grad_norm": 7.925714492797852,
394
- "kl": 0.22265625,
395
- "learning_rate": 9.759656652360515e-07,
396
- "loss": 0.0222,
397
- "reward": 0.375,
398
- "reward_mean": 0.375,
399
- "reward_std": 0.47655022144317627,
400
- "rewards/accuracy_reward": 0.375,
401
- "step": 28
402
- },
403
- {
404
- "advantages": 1.1175870895385742e-08,
405
- "completion_length": 156.46875,
406
- "epoch": 0.024892703862660945,
407
- "grad_norm": 9.395242691040039,
408
- "kl": 0.3984375,
409
- "learning_rate": 9.75107296137339e-07,
410
- "loss": 0.0397,
411
- "reward": 0.09375,
412
- "reward_mean": 0.09375,
413
- "reward_std": 0.2041158676147461,
414
- "rewards/accuracy_reward": 0.09375,
415
- "step": 29
416
- },
417
- {
418
- "advantages": 2.9802322387695312e-08,
419
- "completion_length": 148.65625,
420
- "epoch": 0.02575107296137339,
421
- "grad_norm": 8.077410697937012,
422
- "kl": 0.2890625,
423
- "learning_rate": 9.742489270386267e-07,
424
- "loss": 0.0289,
425
- "reward": 0.28125,
426
- "reward_mean": 0.28125,
427
- "reward_std": 0.4628904461860657,
428
- "rewards/accuracy_reward": 0.28125,
429
- "step": 30
430
- },
431
- {
432
- "advantages": 1.6763806343078613e-08,
433
- "completion_length": 137.3125,
434
- "epoch": 0.026609442060085836,
435
- "grad_norm": 8.573564529418945,
436
- "kl": 0.263671875,
437
- "learning_rate": 9.733905579399142e-07,
438
- "loss": 0.0264,
439
- "reward": 0.28125,
440
- "reward_mean": 0.28125,
441
- "reward_std": 0.378745436668396,
442
- "rewards/accuracy_reward": 0.28125,
443
- "step": 31
444
- },
445
- {
446
- "advantages": 2.9802322387695312e-08,
447
- "completion_length": 148.25,
448
- "epoch": 0.027467811158798282,
449
- "grad_norm": 6.53264045715332,
450
- "kl": 0.265625,
451
- "learning_rate": 9.725321888412016e-07,
452
- "loss": 0.0265,
453
- "reward": 0.28125,
454
- "reward_mean": 0.28125,
455
- "reward_std": 0.4628904461860657,
456
- "rewards/accuracy_reward": 0.28125,
457
- "step": 32
458
- },
459
- {
460
- "advantages": 3.166496753692627e-08,
461
- "completion_length": 133.0,
462
- "epoch": 0.02832618025751073,
463
- "grad_norm": 8.72734260559082,
464
- "kl": 0.36328125,
465
- "learning_rate": 9.716738197424891e-07,
466
- "loss": 0.0363,
467
- "reward": 0.3125,
468
- "reward_mean": 0.3125,
469
- "reward_std": 0.47655022144317627,
470
- "rewards/accuracy_reward": 0.3125,
471
- "step": 33
472
- },
473
- {
474
- "advantages": 9.313225746154785e-09,
475
- "completion_length": 133.3125,
476
- "epoch": 0.029184549356223177,
477
- "grad_norm": 11.700161933898926,
478
- "kl": 0.30859375,
479
- "learning_rate": 9.708154506437768e-07,
480
- "loss": 0.031,
481
- "reward": 0.09375,
482
- "reward_mean": 0.09375,
483
- "reward_std": 0.1293872892856598,
484
- "rewards/accuracy_reward": 0.09375,
485
- "step": 34
486
- },
487
- {
488
- "advantages": 2.9802322387695312e-08,
489
- "completion_length": 145.5625,
490
- "epoch": 0.030042918454935622,
491
- "grad_norm": 25.358783721923828,
492
- "kl": 0.486328125,
493
- "learning_rate": 9.699570815450643e-07,
494
- "loss": 0.0486,
495
- "reward": 0.28125,
496
- "reward_mean": 0.28125,
497
- "reward_std": 0.4628904461860657,
498
- "rewards/accuracy_reward": 0.28125,
499
- "step": 35
500
- },
501
- {
502
- "advantages": 1.862645149230957e-08,
503
- "completion_length": 132.625,
504
- "epoch": 0.030901287553648068,
505
- "grad_norm": 13.985993385314941,
506
- "kl": 0.298828125,
507
- "learning_rate": 9.690987124463518e-07,
508
- "loss": 0.0298,
509
- "reward": 0.15625,
510
- "reward_mean": 0.15625,
511
- "reward_std": 0.3808925747871399,
512
- "rewards/accuracy_reward": 0.15625,
513
- "step": 36
514
- },
515
- {
516
- "advantages": 7.450580596923828e-09,
517
- "completion_length": 160.90625,
518
- "epoch": 0.03175965665236052,
519
- "grad_norm": 18.302053451538086,
520
- "kl": 0.447265625,
521
- "learning_rate": 9.682403433476395e-07,
522
- "loss": 0.0447,
523
- "reward": 0.0625,
524
- "reward_mean": 0.0625,
525
- "reward_std": 0.1767766922712326,
526
- "rewards/accuracy_reward": 0.0625,
527
- "step": 37
528
- },
529
- {
530
- "advantages": 1.1175870895385742e-08,
531
- "completion_length": 144.875,
532
- "epoch": 0.03261802575107296,
533
- "grad_norm": 4.443456172943115,
534
- "kl": 0.248046875,
535
- "learning_rate": 9.67381974248927e-07,
536
- "loss": 0.0247,
537
- "reward": 0.09375,
538
- "reward_mean": 0.09375,
539
- "reward_std": 0.2041158676147461,
540
- "rewards/accuracy_reward": 0.09375,
541
- "step": 38
542
- },
543
- {
544
- "advantages": 1.6763806343078613e-08,
545
- "completion_length": 157.96875,
546
- "epoch": 0.03347639484978541,
547
- "grad_norm": 6.555863380432129,
548
- "kl": 0.279296875,
549
- "learning_rate": 9.665236051502147e-07,
550
- "loss": 0.0279,
551
- "reward": 0.15625,
552
- "reward_mean": 0.15625,
553
- "reward_std": 0.3061639666557312,
554
- "rewards/accuracy_reward": 0.15625,
555
- "step": 39
556
- },
557
- {
558
- "advantages": 3.725290298461914e-09,
559
- "completion_length": 131.03125,
560
- "epoch": 0.034334763948497854,
561
- "grad_norm": 7.260156631469727,
562
- "kl": 0.1865234375,
563
- "learning_rate": 9.656652360515022e-07,
564
- "loss": 0.0186,
565
- "reward": 0.28125,
566
- "reward_mean": 0.28125,
567
- "reward_std": 0.35564959049224854,
568
- "rewards/accuracy_reward": 0.28125,
569
- "step": 40
570
- },
571
- {
572
- "advantages": 1.6763806343078613e-08,
573
- "completion_length": 155.75,
574
- "epoch": 0.0351931330472103,
575
- "grad_norm": 9.163714408874512,
576
- "kl": 0.19140625,
577
- "learning_rate": 9.648068669527897e-07,
578
- "loss": 0.0191,
579
- "reward": 0.15625,
580
- "reward_mean": 0.15625,
581
- "reward_std": 0.3061639666557312,
582
- "rewards/accuracy_reward": 0.15625,
583
- "step": 41
584
- },
585
- {
586
- "advantages": 1.862645149230957e-08,
587
- "completion_length": 153.09375,
588
- "epoch": 0.036051502145922745,
589
- "grad_norm": 9.94912052154541,
590
- "kl": 0.345703125,
591
- "learning_rate": 9.639484978540771e-07,
592
- "loss": 0.0345,
593
- "reward": 0.15625,
594
- "reward_mean": 0.15625,
595
- "reward_std": 0.3198433816432953,
596
- "rewards/accuracy_reward": 0.15625,
597
- "step": 42
598
- },
599
- {
600
- "advantages": 2.0489096641540527e-08,
601
- "completion_length": 163.28125,
602
- "epoch": 0.03690987124463519,
603
- "grad_norm": 4.065970420837402,
604
- "kl": 0.2119140625,
605
- "learning_rate": 9.630901287553648e-07,
606
- "loss": 0.0212,
607
- "reward": 0.1875,
608
- "reward_mean": 0.1875,
609
- "reward_std": 0.3335031569004059,
610
- "rewards/accuracy_reward": 0.1875,
611
- "step": 43
612
- },
613
- {
614
- "advantages": 2.60770320892334e-08,
615
- "completion_length": 149.03125,
616
- "epoch": 0.03776824034334764,
617
- "grad_norm": 6.335684776306152,
618
- "kl": 0.330078125,
619
- "learning_rate": 9.622317596566523e-07,
620
- "loss": 0.033,
621
- "reward": 0.25,
622
- "reward_mean": 0.25,
623
- "reward_std": 0.3745020925998688,
624
- "rewards/accuracy_reward": 0.25,
625
- "step": 44
626
- },
627
- {
628
- "advantages": 1.862645149230957e-08,
629
- "completion_length": 168.0625,
630
- "epoch": 0.03862660944206009,
631
- "grad_norm": 5.854466438293457,
632
- "kl": 0.224609375,
633
- "learning_rate": 9.613733905579398e-07,
634
- "loss": 0.0225,
635
- "reward": 0.15625,
636
- "reward_mean": 0.15625,
637
- "reward_std": 0.3808925747871399,
638
- "rewards/accuracy_reward": 0.15625,
639
- "step": 45
640
- },
641
- {
642
- "advantages": 2.0489096641540527e-08,
643
- "completion_length": 159.3125,
644
- "epoch": 0.039484978540772535,
645
- "grad_norm": 59.691341400146484,
646
- "kl": 0.67578125,
647
- "learning_rate": 9.605150214592275e-07,
648
- "loss": 0.0677,
649
- "reward": 0.1875,
650
- "reward_mean": 0.1875,
651
- "reward_std": 0.3945523500442505,
652
- "rewards/accuracy_reward": 0.1875,
653
- "step": 46
654
- },
655
- {
656
- "advantages": 2.60770320892334e-08,
657
- "completion_length": 149.21875,
658
- "epoch": 0.04034334763948498,
659
- "grad_norm": 5.02371883392334,
660
- "kl": 0.1845703125,
661
- "learning_rate": 9.59656652360515e-07,
662
- "loss": 0.0185,
663
- "reward": 0.21875,
664
- "reward_mean": 0.21875,
665
- "reward_std": 0.4355708956718445,
666
- "rewards/accuracy_reward": 0.21875,
667
- "step": 47
668
- },
669
- {
670
- "advantages": 2.421438694000244e-08,
671
- "completion_length": 167.875,
672
- "epoch": 0.041201716738197426,
673
- "grad_norm": 4.878015041351318,
674
- "kl": 0.185546875,
675
- "learning_rate": 9.587982832618025e-07,
676
- "loss": 0.0185,
677
- "reward": 0.21875,
678
- "reward_mean": 0.21875,
679
- "reward_std": 0.4218915104866028,
680
- "rewards/accuracy_reward": 0.21875,
681
- "step": 48
682
- },
683
- {
684
- "advantages": 2.0489096641540527e-08,
685
- "completion_length": 152.96875,
686
- "epoch": 0.04206008583690987,
687
- "grad_norm": 5.698736667633057,
688
- "kl": 0.234375,
689
- "learning_rate": 9.5793991416309e-07,
690
- "loss": 0.0234,
691
- "reward": 0.21875,
692
- "reward_mean": 0.21875,
693
- "reward_std": 0.3471629321575165,
694
- "rewards/accuracy_reward": 0.21875,
695
- "step": 49
696
- },
697
- {
698
- "advantages": 7.450580596923828e-09,
699
- "completion_length": 157.8125,
700
- "epoch": 0.04291845493562232,
701
- "grad_norm": 5.776604652404785,
702
- "kl": 0.1474609375,
703
- "learning_rate": 9.570815450643777e-07,
704
- "loss": 0.0148,
705
- "reward": 0.3125,
706
- "reward_mean": 0.3125,
707
- "reward_std": 0.3745020925998688,
708
- "rewards/accuracy_reward": 0.3125,
709
- "step": 50
710
- },
711
- {
712
- "advantages": 2.2351741790771484e-08,
713
- "completion_length": 148.78125,
714
- "epoch": 0.04377682403433476,
715
- "grad_norm": 4.41421365737915,
716
- "kl": 0.169921875,
717
- "learning_rate": 9.562231759656652e-07,
718
- "loss": 0.0169,
719
- "reward": 0.21875,
720
- "reward_mean": 0.21875,
721
- "reward_std": 0.3608423173427582,
722
- "rewards/accuracy_reward": 0.21875,
723
- "step": 51
724
- },
725
- {
726
- "advantages": 1.1175870895385742e-08,
727
- "completion_length": 146.90625,
728
- "epoch": 0.04463519313304721,
729
- "grad_norm": 4.667245864868164,
730
- "kl": 0.189453125,
731
- "learning_rate": 9.553648068669528e-07,
732
- "loss": 0.0189,
733
- "reward": 0.21875,
734
- "reward_mean": 0.21875,
735
- "reward_std": 0.3377464711666107,
736
- "rewards/accuracy_reward": 0.21875,
737
- "step": 52
738
- },
739
- {
740
- "advantages": -3.725290298461914e-09,
741
- "completion_length": 158.625,
742
- "epoch": 0.045493562231759654,
743
- "grad_norm": 4.415360927581787,
744
- "kl": 0.1259765625,
745
- "learning_rate": 9.545064377682403e-07,
746
- "loss": 0.0126,
747
- "reward": 0.46875,
748
- "reward_mean": 0.46875,
749
- "reward_std": 0.3808925747871399,
750
- "rewards/accuracy_reward": 0.46875,
751
- "step": 53
752
- },
753
- {
754
- "advantages": 1.4901161193847656e-08,
755
- "completion_length": 155.65625,
756
- "epoch": 0.0463519313304721,
757
- "grad_norm": 12.489053726196289,
758
- "kl": 0.37109375,
759
- "learning_rate": 9.536480686695278e-07,
760
- "loss": 0.0371,
761
- "reward": 0.125,
762
- "reward_mean": 0.125,
763
- "reward_std": 0.2925041913986206,
764
- "rewards/accuracy_reward": 0.125,
765
- "step": 54
766
- },
767
- {
768
- "advantages": 2.9802322387695312e-08,
769
- "completion_length": 172.9375,
770
- "epoch": 0.04721030042918455,
771
- "grad_norm": 5.111673831939697,
772
- "kl": 0.173828125,
773
- "learning_rate": 9.527896995708154e-07,
774
- "loss": 0.0174,
775
- "reward": 0.25,
776
- "reward_mean": 0.25,
777
- "reward_std": 0.4629100561141968,
778
- "rewards/accuracy_reward": 0.25,
779
- "step": 55
780
- },
781
- {
782
- "advantages": 2.9802322387695312e-08,
783
- "completion_length": 153.4375,
784
- "epoch": 0.048068669527897,
785
- "grad_norm": 149.73927307128906,
786
- "kl": 2.0,
787
- "learning_rate": 9.519313304721029e-07,
788
- "loss": 0.2008,
789
- "reward": 0.28125,
790
- "reward_mean": 0.28125,
791
- "reward_std": 0.4628904461860657,
792
- "rewards/accuracy_reward": 0.28125,
793
- "step": 56
794
- },
795
- {
796
- "advantages": 1.4901161193847656e-08,
797
- "completion_length": 133.0625,
798
- "epoch": 0.048927038626609444,
799
- "grad_norm": 13.399458885192871,
800
- "kl": 0.37109375,
801
- "learning_rate": 9.510729613733906e-07,
802
- "loss": 0.0371,
803
- "reward": 0.15625,
804
- "reward_mean": 0.15625,
805
- "reward_std": 0.3061639964580536,
806
- "rewards/accuracy_reward": 0.15625,
807
- "step": 57
808
- },
809
- {
810
- "advantages": 1.6763806343078613e-08,
811
- "completion_length": 163.125,
812
- "epoch": 0.04978540772532189,
813
- "grad_norm": 6.9010210037231445,
814
- "kl": 0.294921875,
815
- "learning_rate": 9.502145922746781e-07,
816
- "loss": 0.0295,
817
- "reward": 0.15625,
818
- "reward_mean": 0.15625,
819
- "reward_std": 0.3061639964580536,
820
- "rewards/accuracy_reward": 0.15625,
821
- "step": 58
822
- },
823
- {
824
- "advantages": 1.6763806343078613e-08,
825
- "completion_length": 146.90625,
826
- "epoch": 0.050643776824034335,
827
- "grad_norm": 6.625538349151611,
828
- "kl": 0.166015625,
829
- "learning_rate": 9.493562231759657e-07,
830
- "loss": 0.0166,
831
- "reward": 0.28125,
832
- "reward_mean": 0.28125,
833
- "reward_std": 0.4397946000099182,
834
- "rewards/accuracy_reward": 0.28125,
835
- "step": 59
836
- },
837
- {
838
- "advantages": 3.725290298461914e-09,
839
- "completion_length": 151.03125,
840
- "epoch": 0.05150214592274678,
841
- "grad_norm": 9.018912315368652,
842
- "kl": 0.169921875,
843
- "learning_rate": 9.484978540772532e-07,
844
- "loss": 0.017,
845
- "reward": 0.28125,
846
- "reward_mean": 0.28125,
847
- "reward_std": 0.3471629321575165,
848
- "rewards/accuracy_reward": 0.28125,
849
- "step": 60
850
- },
851
- {
852
- "advantages": 1.30385160446167e-08,
853
- "completion_length": 121.25,
854
- "epoch": 0.05236051502145923,
855
- "grad_norm": 24.219348907470703,
856
- "kl": 0.47265625,
857
- "learning_rate": 9.476394849785408e-07,
858
- "loss": 0.0473,
859
- "reward": 0.125,
860
- "reward_mean": 0.125,
861
- "reward_std": 0.2177756428718567,
862
- "rewards/accuracy_reward": 0.125,
863
- "step": 61
864
- },
865
- {
866
- "advantages": 1.4901161193847656e-08,
867
- "completion_length": 142.5625,
868
- "epoch": 0.05321888412017167,
869
- "grad_norm": 5.040563106536865,
870
- "kl": 0.201171875,
871
- "learning_rate": 9.467811158798282e-07,
872
- "loss": 0.0201,
873
- "reward": 0.125,
874
- "reward_mean": 0.125,
875
- "reward_std": 0.2925041913986206,
876
- "rewards/accuracy_reward": 0.125,
877
- "step": 62
878
- },
879
- {
880
- "advantages": -1.862645149230957e-09,
881
- "completion_length": 145.53125,
882
- "epoch": 0.05407725321888412,
883
- "grad_norm": 9.56372356414795,
884
- "kl": 0.279296875,
885
- "learning_rate": 9.459227467811158e-07,
886
- "loss": 0.0279,
887
- "reward": 0.4375,
888
- "reward_mean": 0.4375,
889
- "reward_std": 0.49022960662841797,
890
- "rewards/accuracy_reward": 0.4375,
891
- "step": 63
892
- },
893
- {
894
- "advantages": 7.450580596923828e-09,
895
- "completion_length": 110.75,
896
- "epoch": 0.054935622317596564,
897
- "grad_norm": 5.647745132446289,
898
- "kl": 0.1884765625,
899
- "learning_rate": 9.450643776824034e-07,
900
- "loss": 0.0189,
901
- "reward": 0.3125,
902
- "reward_mean": 0.3125,
903
- "reward_std": 0.4082317352294922,
904
- "rewards/accuracy_reward": 0.3125,
905
- "step": 64
906
- },
907
- {
908
- "advantages": -1.30385160446167e-08,
909
- "completion_length": 152.0,
910
- "epoch": 0.055793991416309016,
911
- "grad_norm": 5.315371513366699,
912
- "kl": 0.17578125,
913
- "learning_rate": 9.442060085836909e-07,
914
- "loss": 0.0176,
915
- "reward": 0.46875,
916
- "reward_mean": 0.46875,
917
- "reward_std": 0.4807935357093811,
918
- "rewards/accuracy_reward": 0.46875,
919
- "step": 65
920
- },
921
- {
922
- "advantages": 1.6763806343078613e-08,
923
- "completion_length": 117.6875,
924
- "epoch": 0.05665236051502146,
925
- "grad_norm": 4.9794840812683105,
926
- "kl": 0.2275390625,
927
- "learning_rate": 9.433476394849785e-07,
928
- "loss": 0.0228,
929
- "reward": 0.28125,
930
- "reward_mean": 0.28125,
931
- "reward_std": 0.378745436668396,
932
- "rewards/accuracy_reward": 0.28125,
933
- "step": 66
934
- },
935
- {
936
- "advantages": 2.9802322387695312e-08,
937
- "completion_length": 140.15625,
938
- "epoch": 0.05751072961373391,
939
- "grad_norm": 4.917365074157715,
940
- "kl": 0.171875,
941
- "learning_rate": 9.42489270386266e-07,
942
- "loss": 0.0172,
943
- "reward": 0.28125,
944
- "reward_mean": 0.28125,
945
- "reward_std": 0.4628904461860657,
946
- "rewards/accuracy_reward": 0.28125,
947
- "step": 67
948
- },
949
- {
950
- "advantages": 1.1175870895385742e-08,
951
- "completion_length": 134.09375,
952
- "epoch": 0.05836909871244635,
953
- "grad_norm": 4.92598819732666,
954
- "kl": 0.1640625,
955
- "learning_rate": 9.416309012875536e-07,
956
- "loss": 0.0164,
957
- "reward": 0.34375,
958
- "reward_mean": 0.34375,
959
- "reward_std": 0.4628904461860657,
960
- "rewards/accuracy_reward": 0.34375,
961
- "step": 68
962
- },
963
- {
964
- "advantages": 1.4901161193847656e-08,
965
- "completion_length": 134.0625,
966
- "epoch": 0.0592274678111588,
967
- "grad_norm": 11.736459732055664,
968
- "kl": 0.44921875,
969
- "learning_rate": 9.407725321888411e-07,
970
- "loss": 0.0449,
971
- "reward": 0.25,
972
- "reward_mean": 0.25,
973
- "reward_std": 0.3650856614112854,
974
- "rewards/accuracy_reward": 0.25,
975
- "step": 69
976
- },
977
- {
978
- "advantages": 2.2351741790771484e-08,
979
- "completion_length": 131.5,
980
- "epoch": 0.060085836909871244,
981
- "grad_norm": 21.64668083190918,
982
- "kl": 0.47265625,
983
- "learning_rate": 9.399141630901288e-07,
984
- "loss": 0.0474,
985
- "reward": 0.21875,
986
- "reward_mean": 0.21875,
987
- "reward_std": 0.4218915104866028,
988
- "rewards/accuracy_reward": 0.21875,
989
- "step": 70
990
- },
991
- {
992
- "advantages": 2.0489096641540527e-08,
993
- "completion_length": 137.1875,
994
- "epoch": 0.06094420600858369,
995
- "grad_norm": 4.1953444480896,
996
- "kl": 0.1826171875,
997
- "learning_rate": 9.390557939914163e-07,
998
- "loss": 0.0182,
999
- "reward": 0.1875,
1000
- "reward_mean": 0.1875,
1001
- "reward_std": 0.3335031569004059,
1002
- "rewards/accuracy_reward": 0.1875,
1003
- "step": 71
1004
- },
1005
- {
1006
- "advantages": 3.725290298461914e-09,
1007
- "completion_length": 138.46875,
1008
- "epoch": 0.061802575107296136,
1009
- "grad_norm": 54.577999114990234,
1010
- "kl": 0.73828125,
1011
- "learning_rate": 9.381974248927038e-07,
1012
- "loss": 0.0742,
1013
- "reward": 0.40625,
1014
- "reward_mean": 0.40625,
1015
- "reward_std": 0.4807935357093811,
1016
- "rewards/accuracy_reward": 0.40625,
1017
- "step": 72
1018
- },
1019
- {
1020
- "advantages": 0.0,
1021
- "completion_length": 148.0,
1022
- "epoch": 0.06266094420600858,
1023
- "grad_norm": 24.432819366455078,
1024
- "kl": 0.1826171875,
1025
- "learning_rate": 9.373390557939914e-07,
1026
- "loss": 0.0183,
1027
- "reward": 0.25,
1028
- "reward_mean": 0.25,
1029
- "reward_std": 0.2587745785713196,
1030
- "rewards/accuracy_reward": 0.25,
1031
- "step": 73
1032
- },
1033
- {
1034
- "advantages": 2.421438694000244e-08,
1035
- "completion_length": 145.84375,
1036
- "epoch": 0.06351931330472103,
1037
- "grad_norm": 5.361104965209961,
1038
- "kl": 0.150390625,
1039
- "learning_rate": 9.364806866952789e-07,
1040
- "loss": 0.015,
1041
- "reward": 0.21875,
1042
- "reward_mean": 0.21875,
1043
- "reward_std": 0.4218915104866028,
1044
- "rewards/accuracy_reward": 0.21875,
1045
- "step": 74
1046
- },
1047
- {
1048
- "advantages": 1.862645149230957e-08,
1049
- "completion_length": 148.625,
1050
- "epoch": 0.06437768240343347,
1051
- "grad_norm": 4.662086009979248,
1052
- "kl": 0.1630859375,
1053
- "learning_rate": 9.356223175965665e-07,
1054
- "loss": 0.0163,
1055
- "reward": 0.28125,
1056
- "reward_mean": 0.28125,
1057
- "reward_std": 0.4534739851951599,
1058
- "rewards/accuracy_reward": 0.28125,
1059
- "step": 75
1060
- },
1061
- {
1062
- "advantages": 1.30385160446167e-08,
1063
- "completion_length": 138.375,
1064
- "epoch": 0.06523605150214593,
1065
- "grad_norm": 17.23464012145996,
1066
- "kl": 0.404296875,
1067
- "learning_rate": 9.34763948497854e-07,
1068
- "loss": 0.0406,
1069
- "reward": 0.125,
1070
- "reward_mean": 0.125,
1071
- "reward_std": 0.2177756428718567,
1072
- "rewards/accuracy_reward": 0.125,
1073
- "step": 76
1074
- },
1075
- {
1076
- "advantages": 3.166496753692627e-08,
1077
- "completion_length": 161.4375,
1078
- "epoch": 0.06609442060085836,
1079
- "grad_norm": 4.420433521270752,
1080
- "kl": 0.162109375,
1081
- "learning_rate": 9.339055793991416e-07,
1082
- "loss": 0.0162,
1083
- "reward": 0.3125,
1084
- "reward_mean": 0.3125,
1085
- "reward_std": 0.49022960662841797,
1086
- "rewards/accuracy_reward": 0.3125,
1087
- "step": 77
1088
- },
1089
- {
1090
- "advantages": 2.2351741790771484e-08,
1091
- "completion_length": 145.125,
1092
- "epoch": 0.06695278969957082,
1093
- "grad_norm": 11.806068420410156,
1094
- "kl": 0.294921875,
1095
- "learning_rate": 9.330472103004291e-07,
1096
- "loss": 0.0294,
1097
- "reward": 0.1875,
1098
- "reward_mean": 0.1875,
1099
- "reward_std": 0.4082317352294922,
1100
- "rewards/accuracy_reward": 0.1875,
1101
- "step": 78
1102
- },
1103
- {
1104
- "advantages": 9.313225746154785e-09,
1105
- "completion_length": 146.21875,
1106
- "epoch": 0.06781115879828326,
1107
- "grad_norm": 5.149102210998535,
1108
- "kl": 0.201171875,
1109
- "learning_rate": 9.321888412017167e-07,
1110
- "loss": 0.0201,
1111
- "reward": 0.34375,
1112
- "reward_mean": 0.34375,
1113
- "reward_std": 0.3966485261917114,
1114
- "rewards/accuracy_reward": 0.34375,
1115
- "step": 79
1116
- },
1117
- {
1118
- "advantages": 1.6763806343078613e-08,
1119
- "completion_length": 133.0,
1120
- "epoch": 0.06866952789699571,
1121
- "grad_norm": 4.339179992675781,
1122
- "kl": 0.232421875,
1123
- "learning_rate": 9.313304721030042e-07,
1124
- "loss": 0.0233,
1125
- "reward": 0.15625,
1126
- "reward_mean": 0.15625,
1127
- "reward_std": 0.24511480331420898,
1128
- "rewards/accuracy_reward": 0.15625,
1129
- "step": 80
1130
- },
1131
- {
1132
- "advantages": 7.450580596923828e-09,
1133
- "completion_length": 150.8125,
1134
- "epoch": 0.06952789699570816,
1135
- "grad_norm": 7.417867183685303,
1136
- "kl": 0.166015625,
1137
- "learning_rate": 9.304721030042918e-07,
1138
- "loss": 0.0166,
1139
- "reward": 0.28125,
1140
- "reward_mean": 0.28125,
1141
- "reward_std": 0.3608423173427582,
1142
- "rewards/accuracy_reward": 0.28125,
1143
- "step": 81
1144
- },
1145
- {
1146
- "advantages": 1.862645149230957e-08,
1147
- "completion_length": 150.1875,
1148
- "epoch": 0.0703862660944206,
1149
- "grad_norm": 4.655648708343506,
1150
- "kl": 0.1748046875,
1151
- "learning_rate": 9.296137339055793e-07,
1152
- "loss": 0.0175,
1153
- "reward": 0.15625,
1154
- "reward_mean": 0.15625,
1155
- "reward_std": 0.3198433816432953,
1156
- "rewards/accuracy_reward": 0.15625,
1157
- "step": 82
1158
- },
1159
- {
1160
- "advantages": 1.30385160446167e-08,
1161
- "completion_length": 144.1875,
1162
- "epoch": 0.07124463519313305,
1163
- "grad_norm": 6.301512241363525,
1164
- "kl": 0.140625,
1165
- "learning_rate": 9.287553648068669e-07,
1166
- "loss": 0.014,
1167
- "reward": 0.375,
1168
- "reward_mean": 0.375,
1169
- "reward_std": 0.48503684997558594,
1170
- "rewards/accuracy_reward": 0.375,
1171
- "step": 83
1172
- },
1173
- {
1174
- "advantages": 7.450580596923828e-09,
1175
- "completion_length": 155.75,
1176
- "epoch": 0.07210300429184549,
1177
- "grad_norm": 4.552245616912842,
1178
- "kl": 0.1494140625,
1179
- "learning_rate": 9.278969957081545e-07,
1180
- "loss": 0.015,
1181
- "reward": 0.46875,
1182
- "reward_mean": 0.46875,
1183
- "reward_std": 0.5302791595458984,
1184
- "rewards/accuracy_reward": 0.46875,
1185
- "step": 84
1186
- },
1187
- {
1188
- "advantages": 1.862645149230957e-08,
1189
- "completion_length": 163.59375,
1190
- "epoch": 0.07296137339055794,
1191
- "grad_norm": 5.0817060470581055,
1192
- "kl": 0.1953125,
1193
- "learning_rate": 9.27038626609442e-07,
1194
- "loss": 0.0195,
1195
- "reward": 0.28125,
1196
- "reward_mean": 0.28125,
1197
- "reward_std": 0.4534739851951599,
1198
- "rewards/accuracy_reward": 0.28125,
1199
- "step": 85
1200
- },
1201
- {
1202
- "advantages": 7.450580596923828e-09,
1203
- "completion_length": 167.5,
1204
- "epoch": 0.07381974248927038,
1205
- "grad_norm": 3.2791318893432617,
1206
- "kl": 0.16796875,
1207
- "learning_rate": 9.261802575107296e-07,
1208
- "loss": 0.0168,
1209
- "reward": 0.0625,
1210
- "reward_mean": 0.0625,
1211
- "reward_std": 0.1767766922712326,
1212
- "rewards/accuracy_reward": 0.0625,
1213
- "step": 86
1214
- },
1215
- {
1216
- "advantages": 5.587935447692871e-09,
1217
- "completion_length": 152.03125,
1218
- "epoch": 0.07467811158798283,
1219
- "grad_norm": 5.571934700012207,
1220
- "kl": 0.2421875,
1221
- "learning_rate": 9.253218884120171e-07,
1222
- "loss": 0.0242,
1223
- "reward": 0.28125,
1224
- "reward_mean": 0.28125,
1225
- "reward_std": 0.3608423173427582,
1226
- "rewards/accuracy_reward": 0.28125,
1227
- "step": 87
1228
- },
1229
- {
1230
- "advantages": 1.862645149230957e-08,
1231
- "completion_length": 171.15625,
1232
- "epoch": 0.07553648068669527,
1233
- "grad_norm": 4.619121074676514,
1234
- "kl": 0.2109375,
1235
- "learning_rate": 9.244635193133047e-07,
1236
- "loss": 0.0211,
1237
- "reward": 0.15625,
1238
- "reward_mean": 0.15625,
1239
- "reward_std": 0.3808925747871399,
1240
- "rewards/accuracy_reward": 0.15625,
1241
- "step": 88
1242
- },
1243
- {
1244
- "advantages": 3.725290298461914e-09,
1245
- "completion_length": 153.03125,
1246
- "epoch": 0.07639484978540773,
1247
- "grad_norm": 4.043124198913574,
1248
- "kl": 0.13671875,
1249
- "learning_rate": 9.236051502145923e-07,
1250
- "loss": 0.0137,
1251
- "reward": 0.28125,
1252
- "reward_mean": 0.28125,
1253
- "reward_std": 0.3471629321575165,
1254
- "rewards/accuracy_reward": 0.28125,
1255
- "step": 89
1256
- },
1257
- {
1258
- "advantages": 3.725290298461914e-09,
1259
- "completion_length": 147.53125,
1260
- "epoch": 0.07725321888412018,
1261
- "grad_norm": 2.962092638015747,
1262
- "kl": 0.1513671875,
1263
- "learning_rate": 9.227467811158798e-07,
1264
- "loss": 0.0152,
1265
- "reward": 0.15625,
1266
- "reward_mean": 0.15625,
1267
- "reward_std": 0.22201895713806152,
1268
- "rewards/accuracy_reward": 0.15625,
1269
- "step": 90
1270
- },
1271
- {
1272
- "advantages": 3.3527612686157227e-08,
1273
- "completion_length": 176.9375,
1274
- "epoch": 0.07811158798283262,
1275
- "grad_norm": 5.911281585693359,
1276
- "kl": 0.1484375,
1277
- "learning_rate": 9.218884120171674e-07,
1278
- "loss": 0.0148,
1279
- "reward": 0.3125,
1280
- "reward_mean": 0.3125,
1281
- "reward_std": 0.49022960662841797,
1282
- "rewards/accuracy_reward": 0.3125,
1283
- "step": 91
1284
- },
1285
- {
1286
- "advantages": 0.0,
1287
- "completion_length": 160.875,
1288
- "epoch": 0.07896995708154507,
1289
- "grad_norm": 3.9937198162078857,
1290
- "kl": 0.173828125,
1291
- "learning_rate": 9.210300429184548e-07,
1292
- "loss": 0.0174,
1293
- "reward": 0.25,
1294
- "reward_mean": 0.25,
1295
- "reward_std": 0.2925041913986206,
1296
- "rewards/accuracy_reward": 0.25,
1297
- "step": 92
1298
- },
1299
- {
1300
- "advantages": 1.1175870895385742e-08,
1301
- "completion_length": 167.0,
1302
- "epoch": 0.07982832618025751,
1303
- "grad_norm": 3.8715927600860596,
1304
- "kl": 0.134765625,
1305
- "learning_rate": 9.201716738197424e-07,
1306
- "loss": 0.0135,
1307
- "reward": 0.25,
1308
- "reward_mean": 0.25,
1309
- "reward_std": 0.3514062464237213,
1310
- "rewards/accuracy_reward": 0.25,
1311
- "step": 93
1312
- },
1313
- {
1314
- "advantages": 3.166496753692627e-08,
1315
- "completion_length": 155.53125,
1316
- "epoch": 0.08068669527896996,
1317
- "grad_norm": 4.316589832305908,
1318
- "kl": 0.1640625,
1319
- "learning_rate": 9.193133047210299e-07,
1320
- "loss": 0.0164,
1321
- "reward": 0.28125,
1322
- "reward_mean": 0.28125,
1323
- "reward_std": 0.4765698313713074,
1324
- "rewards/accuracy_reward": 0.28125,
1325
- "step": 94
1326
- },
1327
- {
1328
- "advantages": 2.60770320892334e-08,
1329
- "completion_length": 158.15625,
1330
- "epoch": 0.0815450643776824,
1331
- "grad_norm": 4.352808952331543,
1332
- "kl": 0.1416015625,
1333
- "learning_rate": 9.184549356223176e-07,
1334
- "loss": 0.0142,
1335
- "reward": 0.25,
1336
- "reward_mean": 0.25,
1337
- "reward_std": 0.4355512857437134,
1338
- "rewards/accuracy_reward": 0.25,
1339
- "step": 95
1340
- },
1341
- {
1342
- "advantages": 1.6763806343078613e-08,
1343
- "completion_length": 142.0625,
1344
- "epoch": 0.08240343347639485,
1345
- "grad_norm": 6.171362400054932,
1346
- "kl": 0.1787109375,
1347
- "learning_rate": 9.175965665236051e-07,
1348
- "loss": 0.0178,
1349
- "reward": 0.15625,
1350
- "reward_mean": 0.15625,
1351
- "reward_std": 0.3061639666557312,
1352
- "rewards/accuracy_reward": 0.15625,
1353
- "step": 96
1354
- },
1355
- {
1356
- "advantages": 1.30385160446167e-08,
1357
- "completion_length": 163.78125,
1358
- "epoch": 0.08326180257510729,
1359
- "grad_norm": 3.2692599296569824,
1360
- "kl": 0.1796875,
1361
- "learning_rate": 9.167381974248927e-07,
1362
- "loss": 0.0179,
1363
- "reward": 0.125,
1364
- "reward_mean": 0.125,
1365
- "reward_std": 0.2177756428718567,
1366
- "rewards/accuracy_reward": 0.125,
1367
- "step": 97
1368
- },
1369
- {
1370
- "advantages": 1.30385160446167e-08,
1371
- "completion_length": 158.0625,
1372
- "epoch": 0.08412017167381974,
1373
- "grad_norm": 3.954564094543457,
1374
- "kl": 0.123046875,
1375
- "learning_rate": 9.158798283261803e-07,
1376
- "loss": 0.0123,
1377
- "reward": 0.25,
1378
- "reward_mean": 0.25,
1379
- "reward_std": 0.3514062464237213,
1380
- "rewards/accuracy_reward": 0.25,
1381
- "step": 98
1382
- },
1383
- {
1384
- "advantages": 0.0,
1385
- "completion_length": 140.09375,
1386
- "epoch": 0.08497854077253218,
1387
- "grad_norm": 4.733702659606934,
1388
- "kl": 0.166015625,
1389
- "learning_rate": 9.150214592274678e-07,
1390
- "loss": 0.0166,
1391
- "reward": 0.25,
1392
- "reward_mean": 0.25,
1393
- "reward_std": 0.292504221200943,
1394
- "rewards/accuracy_reward": 0.25,
1395
- "step": 99
1396
- },
1397
- {
1398
- "advantages": 1.6763806343078613e-08,
1399
- "completion_length": 139.0625,
1400
- "epoch": 0.08583690987124463,
1401
- "grad_norm": 4.053472995758057,
1402
- "kl": 0.15625,
1403
- "learning_rate": 9.141630901287554e-07,
1404
- "loss": 0.0157,
1405
- "reward": 0.15625,
1406
- "reward_mean": 0.15625,
1407
- "reward_std": 0.3061639666557312,
1408
- "rewards/accuracy_reward": 0.15625,
1409
- "step": 100
1410
- },
1411
- {
1412
- "advantages": 2.421438694000244e-08,
1413
- "completion_length": 135.96875,
1414
- "epoch": 0.08669527896995709,
1415
- "grad_norm": 5.235788822174072,
1416
- "kl": 0.18359375,
1417
- "learning_rate": 9.133047210300429e-07,
1418
- "loss": 0.0184,
1419
- "reward": 0.34375,
1420
- "reward_mean": 0.34375,
1421
- "reward_std": 0.4944729208946228,
1422
- "rewards/accuracy_reward": 0.34375,
1423
- "step": 101
1424
- },
1425
- {
1426
- "advantages": 1.862645149230957e-09,
1427
- "completion_length": 164.1875,
1428
- "epoch": 0.08755364806866953,
1429
- "grad_norm": 4.011264801025391,
1430
- "kl": 0.158203125,
1431
- "learning_rate": 9.124463519313305e-07,
1432
- "loss": 0.0158,
1433
- "reward": 0.25,
1434
- "reward_mean": 0.25,
1435
- "reward_std": 0.3335031569004059,
1436
- "rewards/accuracy_reward": 0.25,
1437
- "step": 102
1438
- },
1439
- {
1440
- "advantages": 1.30385160446167e-08,
1441
- "completion_length": 141.59375,
1442
- "epoch": 0.08841201716738198,
1443
- "grad_norm": 4.756768226623535,
1444
- "kl": 0.12109375,
1445
- "learning_rate": 9.115879828326179e-07,
1446
- "loss": 0.0121,
1447
- "reward": 0.375,
1448
- "reward_mean": 0.375,
1449
- "reward_std": 0.4492306709289551,
1450
- "rewards/accuracy_reward": 0.375,
1451
- "step": 103
1452
- },
1453
- {
1454
- "advantages": 2.9802322387695312e-08,
1455
- "completion_length": 157.4375,
1456
- "epoch": 0.08927038626609442,
1457
- "grad_norm": 4.457541465759277,
1458
- "kl": 0.1767578125,
1459
- "learning_rate": 9.107296137339055e-07,
1460
- "loss": 0.0177,
1461
- "reward": 0.28125,
1462
- "reward_mean": 0.28125,
1463
- "reward_std": 0.4628904461860657,
1464
- "rewards/accuracy_reward": 0.28125,
1465
- "step": 104
1466
- },
1467
- {
1468
- "advantages": 1.4901161193847656e-08,
1469
- "completion_length": 174.65625,
1470
- "epoch": 0.09012875536480687,
1471
- "grad_norm": 3.6339664459228516,
1472
- "kl": 0.1484375,
1473
- "learning_rate": 9.09871244635193e-07,
1474
- "loss": 0.0148,
1475
- "reward": 0.125,
1476
- "reward_mean": 0.125,
1477
- "reward_std": 0.292504221200943,
1478
- "rewards/accuracy_reward": 0.125,
1479
- "step": 105
1480
- },
1481
- {
1482
- "advantages": 2.0489096641540527e-08,
1483
- "completion_length": 160.625,
1484
- "epoch": 0.09098712446351931,
1485
- "grad_norm": 5.5795159339904785,
1486
- "kl": 0.1474609375,
1487
- "learning_rate": 9.090128755364806e-07,
1488
- "loss": 0.0147,
1489
- "reward": 0.1875,
1490
- "reward_mean": 0.1875,
1491
- "reward_std": 0.3945523500442505,
1492
- "rewards/accuracy_reward": 0.1875,
1493
- "step": 106
1494
- },
1495
- {
1496
- "advantages": -1.862645149230957e-09,
1497
- "completion_length": 175.09375,
1498
- "epoch": 0.09184549356223176,
1499
- "grad_norm": 4.58608341217041,
1500
- "kl": 0.20703125,
1501
- "learning_rate": 9.081545064377682e-07,
1502
- "loss": 0.0207,
1503
- "reward": 0.21875,
1504
- "reward_mean": 0.21875,
1505
- "reward_std": 0.24511480331420898,
1506
- "rewards/accuracy_reward": 0.21875,
1507
- "step": 107
1508
- },
1509
- {
1510
- "advantages": 1.4901161193847656e-08,
1511
- "completion_length": 164.40625,
1512
- "epoch": 0.0927038626609442,
1513
- "grad_norm": 4.733726978302002,
1514
- "kl": 0.13671875,
1515
- "learning_rate": 9.072961373390558e-07,
1516
- "loss": 0.0137,
1517
- "reward": 0.25,
1518
- "reward_mean": 0.25,
1519
- "reward_std": 0.4261348247528076,
1520
- "rewards/accuracy_reward": 0.25,
1521
- "step": 108
1522
- },
1523
- {
1524
- "advantages": 1.30385160446167e-08,
1525
- "completion_length": 137.53125,
1526
- "epoch": 0.09356223175965665,
1527
- "grad_norm": 4.860609531402588,
1528
- "kl": 0.1376953125,
1529
- "learning_rate": 9.064377682403434e-07,
1530
- "loss": 0.0138,
1531
- "reward": 0.375,
1532
- "reward_mean": 0.375,
1533
- "reward_std": 0.4492306709289551,
1534
- "rewards/accuracy_reward": 0.375,
1535
- "step": 109
1536
- },
1537
- {
1538
- "advantages": 2.60770320892334e-08,
1539
- "completion_length": 143.1875,
1540
- "epoch": 0.0944206008583691,
1541
- "grad_norm": 5.030947685241699,
1542
- "kl": 0.216796875,
1543
- "learning_rate": 9.055793991416309e-07,
1544
- "loss": 0.0217,
1545
- "reward": 0.25,
1546
- "reward_mean": 0.25,
1547
- "reward_std": 0.4492306709289551,
1548
- "rewards/accuracy_reward": 0.25,
1549
- "step": 110
1550
- },
1551
- {
1552
- "advantages": 2.0489096641540527e-08,
1553
- "completion_length": 154.6875,
1554
- "epoch": 0.09527896995708154,
1555
- "grad_norm": 5.2177534103393555,
1556
- "kl": 0.138671875,
1557
- "learning_rate": 9.047210300429185e-07,
1558
- "loss": 0.0139,
1559
- "reward": 0.4375,
1560
- "reward_mean": 0.4375,
1561
- "reward_std": 0.3335031569004059,
1562
- "rewards/accuracy_reward": 0.4375,
1563
- "step": 111
1564
- },
1565
- {
1566
- "advantages": 9.313225746154785e-09,
1567
- "completion_length": 127.625,
1568
- "epoch": 0.096137339055794,
1569
- "grad_norm": 5.624181747436523,
1570
- "kl": 0.1787109375,
1571
- "learning_rate": 9.03862660944206e-07,
1572
- "loss": 0.0179,
1573
- "reward": 0.46875,
1574
- "reward_mean": 0.46875,
1575
- "reward_std": 0.5302791595458984,
1576
- "rewards/accuracy_reward": 0.46875,
1577
- "step": 112
1578
- },
1579
- {
1580
- "advantages": 7.450580596923828e-09,
1581
- "completion_length": 153.84375,
1582
- "epoch": 0.09699570815450644,
1583
- "grad_norm": 3.743622303009033,
1584
- "kl": 0.1396484375,
1585
- "learning_rate": 9.030042918454935e-07,
1586
- "loss": 0.0139,
1587
- "reward": 0.1875,
1588
- "reward_mean": 0.1875,
1589
- "reward_std": 0.3104073107242584,
1590
- "rewards/accuracy_reward": 0.1875,
1591
- "step": 113
1592
- },
1593
- {
1594
- "advantages": -1.1175870895385742e-08,
1595
- "completion_length": 162.5625,
1596
- "epoch": 0.09785407725321889,
1597
- "grad_norm": 4.363542556762695,
1598
- "kl": 0.181640625,
1599
- "learning_rate": 9.02145922746781e-07,
1600
- "loss": 0.0182,
1601
- "reward": 0.375,
1602
- "reward_mean": 0.375,
1603
- "reward_std": 0.3745020925998688,
1604
- "rewards/accuracy_reward": 0.375,
1605
- "step": 114
1606
- },
1607
- {
1608
- "advantages": 7.450580596923828e-09,
1609
- "completion_length": 154.3125,
1610
- "epoch": 0.09871244635193133,
1611
- "grad_norm": 5.912485599517822,
1612
- "kl": 0.169921875,
1613
- "learning_rate": 9.012875536480686e-07,
1614
- "loss": 0.017,
1615
- "reward": 0.34375,
1616
- "reward_mean": 0.34375,
1617
- "reward_std": 0.3608423173427582,
1618
- "rewards/accuracy_reward": 0.34375,
1619
- "step": 115
1620
- },
1621
- {
1622
- "advantages": 1.4901161193847656e-08,
1623
- "completion_length": 153.03125,
1624
- "epoch": 0.09957081545064378,
1625
- "grad_norm": 4.207987308502197,
1626
- "kl": 0.11962890625,
1627
- "learning_rate": 9.004291845493562e-07,
1628
- "loss": 0.0119,
1629
- "reward": 0.25,
1630
- "reward_mean": 0.25,
1631
- "reward_std": 0.4261348247528076,
1632
- "rewards/accuracy_reward": 0.25,
1633
- "step": 116
1634
- },
1635
- {
1636
- "advantages": -5.587935447692871e-09,
1637
- "completion_length": 149.09375,
1638
- "epoch": 0.10042918454935622,
1639
- "grad_norm": 4.171358108520508,
1640
- "kl": 0.181640625,
1641
- "learning_rate": 8.995708154506437e-07,
1642
- "loss": 0.0181,
1643
- "reward": 0.1875,
1644
- "reward_mean": 0.1875,
1645
- "reward_std": 0.2177756428718567,
1646
- "rewards/accuracy_reward": 0.1875,
1647
- "step": 117
1648
- },
1649
- {
1650
- "advantages": 1.6763806343078613e-08,
1651
- "completion_length": 135.25,
1652
- "epoch": 0.10128755364806867,
1653
- "grad_norm": 3.7221720218658447,
1654
- "kl": 0.1357421875,
1655
- "learning_rate": 8.987124463519313e-07,
1656
- "loss": 0.0136,
1657
- "reward": 0.15625,
1658
- "reward_mean": 0.15625,
1659
- "reward_std": 0.3061639964580536,
1660
- "rewards/accuracy_reward": 0.15625,
1661
- "step": 118
1662
- },
1663
- {
1664
- "advantages": 1.4901161193847656e-08,
1665
- "completion_length": 154.40625,
1666
- "epoch": 0.10214592274678111,
1667
- "grad_norm": 7.550022602081299,
1668
- "kl": 0.318359375,
1669
- "learning_rate": 8.978540772532188e-07,
1670
- "loss": 0.0318,
1671
- "reward": 0.25,
1672
- "reward_mean": 0.25,
1673
- "reward_std": 0.3650856614112854,
1674
- "rewards/accuracy_reward": 0.25,
1675
- "step": 119
1676
- },
1677
- {
1678
- "advantages": 2.9802322387695312e-08,
1679
- "completion_length": 139.15625,
1680
- "epoch": 0.10300429184549356,
1681
- "grad_norm": 5.675111293792725,
1682
- "kl": 0.1552734375,
1683
- "learning_rate": 8.969957081545064e-07,
1684
- "loss": 0.0155,
1685
- "reward": 0.28125,
1686
- "reward_mean": 0.28125,
1687
- "reward_std": 0.4628904461860657,
1688
- "rewards/accuracy_reward": 0.28125,
1689
- "step": 120
1690
- },
1691
- {
1692
- "advantages": 1.1175870895385742e-08,
1693
- "completion_length": 138.125,
1694
- "epoch": 0.10386266094420601,
1695
- "grad_norm": 4.683244228363037,
1696
- "kl": 0.142578125,
1697
- "learning_rate": 8.96137339055794e-07,
1698
- "loss": 0.0142,
1699
- "reward": 0.34375,
1700
- "reward_mean": 0.34375,
1701
- "reward_std": 0.4628904461860657,
1702
- "rewards/accuracy_reward": 0.34375,
1703
- "step": 121
1704
- },
1705
- {
1706
- "advantages": -1.862645149230957e-09,
1707
- "completion_length": 129.4375,
1708
- "epoch": 0.10472103004291845,
1709
- "grad_norm": 4.976833820343018,
1710
- "kl": 0.1767578125,
1711
- "learning_rate": 8.952789699570816e-07,
1712
- "loss": 0.0177,
1713
- "reward": 0.46875,
1714
- "reward_mean": 0.46875,
1715
- "reward_std": 0.5038893818855286,
1716
- "rewards/accuracy_reward": 0.46875,
1717
- "step": 122
1718
- },
1719
- {
1720
- "advantages": 1.862645149230957e-09,
1721
- "completion_length": 153.03125,
1722
- "epoch": 0.1055793991416309,
1723
- "grad_norm": 4.796186447143555,
1724
- "kl": 0.1201171875,
1725
- "learning_rate": 8.94420600858369e-07,
1726
- "loss": 0.012,
1727
- "reward": 0.375,
1728
- "reward_mean": 0.375,
1729
- "reward_std": 0.4671337604522705,
1730
- "rewards/accuracy_reward": 0.375,
1731
- "step": 123
1732
- },
1733
- {
1734
- "advantages": -1.862645149230957e-09,
1735
- "completion_length": 109.25,
1736
- "epoch": 0.10643776824034334,
1737
- "grad_norm": 4.939033031463623,
1738
- "kl": 0.2236328125,
1739
- "learning_rate": 8.935622317596566e-07,
1740
- "loss": 0.0223,
1741
- "reward": 0.46875,
1742
- "reward_mean": 0.46875,
1743
- "reward_std": 0.3061639964580536,
1744
- "rewards/accuracy_reward": 0.46875,
1745
- "step": 124
1746
- },
1747
- {
1748
- "advantages": 1.30385160446167e-08,
1749
- "completion_length": 131.90625,
1750
- "epoch": 0.1072961373390558,
1751
- "grad_norm": 6.3975114822387695,
1752
- "kl": 0.1953125,
1753
- "learning_rate": 8.927038626609442e-07,
1754
- "loss": 0.0195,
1755
- "reward": 0.375,
1756
- "reward_mean": 0.375,
1757
- "reward_std": 0.48503684997558594,
1758
- "rewards/accuracy_reward": 0.375,
1759
- "step": 125
1760
- },
1761
- {
1762
- "advantages": 1.30385160446167e-08,
1763
- "completion_length": 108.375,
1764
- "epoch": 0.10815450643776824,
1765
- "grad_norm": 5.976772308349609,
1766
- "kl": 0.1650390625,
1767
- "learning_rate": 8.918454935622317e-07,
1768
- "loss": 0.0165,
1769
- "reward": 0.375,
1770
- "reward_mean": 0.375,
1771
- "reward_std": 0.49022960662841797,
1772
- "rewards/accuracy_reward": 0.375,
1773
- "step": 126
1774
- },
1775
- {
1776
- "advantages": 1.4901161193847656e-08,
1777
- "completion_length": 129.25,
1778
- "epoch": 0.10901287553648069,
1779
- "grad_norm": 5.26469087600708,
1780
- "kl": 0.14453125,
1781
- "learning_rate": 8.909871244635193e-07,
1782
- "loss": 0.0144,
1783
- "reward": 0.375,
1784
- "reward_mean": 0.375,
1785
- "reward_std": 0.4629100561141968,
1786
- "rewards/accuracy_reward": 0.375,
1787
- "step": 127
1788
- },
1789
- {
1790
- "advantages": -1.1175870895385742e-08,
1791
- "completion_length": 128.3125,
1792
- "epoch": 0.10987124463519313,
1793
- "grad_norm": 5.042163848876953,
1794
- "kl": 0.1328125,
1795
- "learning_rate": 8.901287553648068e-07,
1796
- "loss": 0.0133,
1797
- "reward": 0.5,
1798
- "reward_mean": 0.5,
1799
- "reward_std": 0.5081326961517334,
1800
- "rewards/accuracy_reward": 0.5,
1801
- "step": 128
1802
- },
1803
- {
1804
- "advantages": 1.862645149230957e-08,
1805
- "completion_length": 150.875,
1806
- "epoch": 0.11072961373390558,
1807
- "grad_norm": 4.708669662475586,
1808
- "kl": 0.12353515625,
1809
- "learning_rate": 8.892703862660944e-07,
1810
- "loss": 0.0124,
1811
- "reward": 0.15625,
1812
- "reward_mean": 0.15625,
1813
- "reward_std": 0.3808925747871399,
1814
- "rewards/accuracy_reward": 0.15625,
1815
- "step": 129
1816
- },
1817
- {
1818
- "advantages": 0.0,
1819
- "completion_length": 120.40625,
1820
- "epoch": 0.11158798283261803,
1821
- "grad_norm": 5.916502952575684,
1822
- "kl": 0.1708984375,
1823
- "learning_rate": 8.884120171673819e-07,
1824
- "loss": 0.0171,
1825
- "reward": 0.34375,
1826
- "reward_mean": 0.34375,
1827
- "reward_std": 0.4397946000099182,
1828
- "rewards/accuracy_reward": 0.34375,
1829
- "step": 130
1830
- },
1831
- {
1832
- "advantages": 1.4901161193847656e-08,
1833
- "completion_length": 136.90625,
1834
- "epoch": 0.11244635193133047,
1835
- "grad_norm": 4.78549861907959,
1836
- "kl": 0.1708984375,
1837
- "learning_rate": 8.875536480686695e-07,
1838
- "loss": 0.0171,
1839
- "reward": 0.40625,
1840
- "reward_mean": 0.40625,
1841
- "reward_std": 0.5038893818855286,
1842
- "rewards/accuracy_reward": 0.40625,
1843
- "step": 131
1844
- },
1845
- {
1846
- "advantages": -9.313225746154785e-09,
1847
- "completion_length": 109.71875,
1848
- "epoch": 0.11330472103004292,
1849
- "grad_norm": 6.360599040985107,
1850
- "kl": 0.25390625,
1851
- "learning_rate": 8.86695278969957e-07,
1852
- "loss": 0.0254,
1853
- "reward": 0.40625,
1854
- "reward_mean": 0.40625,
1855
- "reward_std": 0.3608423173427582,
1856
- "rewards/accuracy_reward": 0.40625,
1857
- "step": 132
1858
- },
1859
- {
1860
- "advantages": 1.4901161193847656e-08,
1861
- "completion_length": 135.375,
1862
- "epoch": 0.11416309012875536,
1863
- "grad_norm": 4.24266242980957,
1864
- "kl": 0.1650390625,
1865
- "learning_rate": 8.858369098712447e-07,
1866
- "loss": 0.0165,
1867
- "reward": 0.28125,
1868
- "reward_mean": 0.28125,
1869
- "reward_std": 0.378745436668396,
1870
- "rewards/accuracy_reward": 0.28125,
1871
- "step": 133
1872
- },
1873
- {
1874
- "advantages": 0.0,
1875
- "completion_length": 97.59375,
1876
- "epoch": 0.11502145922746781,
1877
- "grad_norm": 6.252864360809326,
1878
- "kl": 0.2177734375,
1879
- "learning_rate": 8.849785407725322e-07,
1880
- "loss": 0.0217,
1881
- "reward": 0.5,
1882
- "reward_mean": 0.5,
1883
- "reward_std": 0.4082317352294922,
1884
- "rewards/accuracy_reward": 0.5,
1885
- "step": 134
1886
- },
1887
- {
1888
- "advantages": 2.0489096641540527e-08,
1889
- "completion_length": 118.8125,
1890
- "epoch": 0.11587982832618025,
1891
- "grad_norm": 5.049542427062988,
1892
- "kl": 0.2060546875,
1893
- "learning_rate": 8.841201716738197e-07,
1894
- "loss": 0.0206,
1895
- "reward": 0.1875,
1896
- "reward_mean": 0.1875,
1897
- "reward_std": 0.3335031569004059,
1898
- "rewards/accuracy_reward": 0.1875,
1899
- "step": 135
1900
- },
1901
- {
1902
- "advantages": -3.725290298461914e-09,
1903
- "completion_length": 120.5625,
1904
- "epoch": 0.1167381974248927,
1905
- "grad_norm": 5.062263011932373,
1906
- "kl": 0.189453125,
1907
- "learning_rate": 8.832618025751073e-07,
1908
- "loss": 0.0189,
1909
- "reward": 0.59375,
1910
- "reward_mean": 0.59375,
1911
- "reward_std": 0.4807935357093811,
1912
- "rewards/accuracy_reward": 0.59375,
1913
- "step": 136
1914
- },
1915
- {
1916
- "advantages": 2.2351741790771484e-08,
1917
- "completion_length": 122.96875,
1918
- "epoch": 0.11759656652360514,
1919
- "grad_norm": 6.447961330413818,
1920
- "kl": 0.19921875,
1921
- "learning_rate": 8.824034334763948e-07,
1922
- "loss": 0.0199,
1923
- "reward": 0.34375,
1924
- "reward_mean": 0.34375,
1925
- "reward_std": 0.4944729208946228,
1926
- "rewards/accuracy_reward": 0.34375,
1927
- "step": 137
1928
- },
1929
- {
1930
- "advantages": 1.862645149230957e-08,
1931
- "completion_length": 125.3125,
1932
- "epoch": 0.1184549356223176,
1933
- "grad_norm": 6.920899868011475,
1934
- "kl": 0.208984375,
1935
- "learning_rate": 8.815450643776824e-07,
1936
- "loss": 0.0209,
1937
- "reward": 0.28125,
1938
- "reward_mean": 0.28125,
1939
- "reward_std": 0.4534739851951599,
1940
- "rewards/accuracy_reward": 0.28125,
1941
- "step": 138
1942
- },
1943
- {
1944
- "advantages": -3.725290298461914e-09,
1945
- "completion_length": 128.4375,
1946
- "epoch": 0.11931330472103004,
1947
- "grad_norm": 4.800882339477539,
1948
- "kl": 0.173828125,
1949
- "learning_rate": 8.806866952789699e-07,
1950
- "loss": 0.0173,
1951
- "reward": 0.5,
1952
- "reward_mean": 0.5,
1953
- "reward_std": 0.4492306709289551,
1954
- "rewards/accuracy_reward": 0.5,
1955
- "step": 139
1956
- },
1957
- {
1958
- "advantages": 1.862645149230957e-08,
1959
- "completion_length": 108.1875,
1960
- "epoch": 0.12017167381974249,
1961
- "grad_norm": 22.795358657836914,
1962
- "kl": 0.419921875,
1963
- "learning_rate": 8.798283261802575e-07,
1964
- "loss": 0.042,
1965
- "reward": 0.15625,
1966
- "reward_mean": 0.15625,
1967
- "reward_std": 0.3808925747871399,
1968
- "rewards/accuracy_reward": 0.15625,
1969
- "step": 140
1970
- },
1971
- {
1972
- "advantages": 1.1175870895385742e-08,
1973
- "completion_length": 141.84375,
1974
- "epoch": 0.12103004291845494,
1975
- "grad_norm": 6.3895745277404785,
1976
- "kl": 0.1962890625,
1977
- "learning_rate": 8.78969957081545e-07,
1978
- "loss": 0.0196,
1979
- "reward": 0.375,
1980
- "reward_mean": 0.375,
1981
- "reward_std": 0.47655022144317627,
1982
- "rewards/accuracy_reward": 0.375,
1983
- "step": 141
1984
- },
1985
- {
1986
- "advantages": 7.450580596923828e-09,
1987
- "completion_length": 115.3125,
1988
- "epoch": 0.12188841201716738,
1989
- "grad_norm": 4.0062103271484375,
1990
- "kl": 0.1669921875,
1991
- "learning_rate": 8.781115879828326e-07,
1992
- "loss": 0.0167,
1993
- "reward": 0.1875,
1994
- "reward_mean": 0.1875,
1995
- "reward_std": 0.249358132481575,
1996
- "rewards/accuracy_reward": 0.1875,
1997
- "step": 142
1998
- },
1999
- {
2000
- "advantages": -3.725290298461914e-09,
2001
- "completion_length": 131.84375,
2002
- "epoch": 0.12274678111587983,
2003
- "grad_norm": 6.299867630004883,
2004
- "kl": 0.2451171875,
2005
- "learning_rate": 8.772532188841201e-07,
2006
- "loss": 0.0246,
2007
- "reward": 0.625,
2008
- "reward_mean": 0.625,
2009
- "reward_std": 0.4671337604522705,
2010
- "rewards/accuracy_reward": 0.625,
2011
- "step": 143
2012
- },
2013
- {
2014
- "advantages": -1.30385160446167e-08,
2015
- "completion_length": 134.3125,
2016
- "epoch": 0.12360515021459227,
2017
- "grad_norm": 5.634101390838623,
2018
- "kl": 0.18359375,
2019
- "learning_rate": 8.763948497854076e-07,
2020
- "loss": 0.0184,
2021
- "reward": 0.625,
2022
- "reward_mean": 0.625,
2023
- "reward_std": 0.4850368797779083,
2024
- "rewards/accuracy_reward": 0.625,
2025
- "step": 144
2026
- },
2027
- {
2028
- "advantages": -2.0489096641540527e-08,
2029
- "completion_length": 124.90625,
2030
- "epoch": 0.12446351931330472,
2031
- "grad_norm": 5.229036331176758,
2032
- "kl": 0.1435546875,
2033
- "learning_rate": 8.755364806866952e-07,
2034
- "loss": 0.0143,
2035
- "reward": 0.53125,
2036
- "reward_mean": 0.53125,
2037
- "reward_std": 0.4628904461860657,
2038
- "rewards/accuracy_reward": 0.53125,
2039
- "step": 145
2040
- },
2041
- {
2042
- "advantages": 7.450580596923828e-09,
2043
- "completion_length": 121.59375,
2044
- "epoch": 0.12532188841201716,
2045
- "grad_norm": 4.288768768310547,
2046
- "kl": 0.1875,
2047
- "learning_rate": 8.746781115879828e-07,
2048
- "loss": 0.0188,
2049
- "reward": 0.3125,
2050
- "reward_mean": 0.3125,
2051
- "reward_std": 0.3745020925998688,
2052
- "rewards/accuracy_reward": 0.3125,
2053
- "step": 146
2054
- },
2055
- {
2056
- "advantages": 2.60770320892334e-08,
2057
- "completion_length": 145.53125,
2058
- "epoch": 0.12618025751072962,
2059
- "grad_norm": 4.884040355682373,
2060
- "kl": 0.189453125,
2061
- "learning_rate": 8.738197424892704e-07,
2062
- "loss": 0.0189,
2063
- "reward": 0.375,
2064
- "reward_mean": 0.375,
2065
- "reward_std": 0.5081326961517334,
2066
- "rewards/accuracy_reward": 0.375,
2067
- "step": 147
2068
- },
2069
- {
2070
- "advantages": 2.60770320892334e-08,
2071
- "completion_length": 129.34375,
2072
- "epoch": 0.12703862660944207,
2073
- "grad_norm": 5.560000419616699,
2074
- "kl": 0.1904296875,
2075
- "learning_rate": 8.729613733905579e-07,
2076
- "loss": 0.019,
2077
- "reward": 0.375,
2078
- "reward_mean": 0.375,
2079
- "reward_std": 0.5081326961517334,
2080
- "rewards/accuracy_reward": 0.375,
2081
- "step": 148
2082
- },
2083
- {
2084
- "advantages": 9.313225746154785e-09,
2085
- "completion_length": 125.46875,
2086
- "epoch": 0.1278969957081545,
2087
- "grad_norm": 5.247032642364502,
2088
- "kl": 0.14453125,
2089
- "learning_rate": 8.721030042918455e-07,
2090
- "loss": 0.0145,
2091
- "reward": 0.46875,
2092
- "reward_mean": 0.46875,
2093
- "reward_std": 0.5302791595458984,
2094
- "rewards/accuracy_reward": 0.46875,
2095
- "step": 149
2096
- },
2097
- {
2098
- "advantages": 1.1175870895385742e-08,
2099
- "completion_length": 123.5625,
2100
- "epoch": 0.12875536480686695,
2101
- "grad_norm": 6.392980098724365,
2102
- "kl": 0.193359375,
2103
- "learning_rate": 8.71244635193133e-07,
2104
- "loss": 0.0193,
2105
- "reward": 0.5,
2106
- "reward_mean": 0.5,
2107
- "reward_std": 0.5081326961517334,
2108
- "rewards/accuracy_reward": 0.5,
2109
- "step": 150
2110
- },
2111
- {
2112
- "advantages": -1.4901161193847656e-08,
2113
- "completion_length": 125.84375,
2114
- "epoch": 0.1296137339055794,
2115
- "grad_norm": 5.506472587585449,
2116
- "kl": 0.2099609375,
2117
- "learning_rate": 8.703862660944206e-07,
2118
- "loss": 0.021,
2119
- "reward": 0.5625,
2120
- "reward_mean": 0.5625,
2121
- "reward_std": 0.5260357856750488,
2122
- "rewards/accuracy_reward": 0.5625,
2123
- "step": 151
2124
- },
2125
- {
2126
- "advantages": 5.587935447692871e-09,
2127
- "completion_length": 111.53125,
2128
- "epoch": 0.13047210300429185,
2129
- "grad_norm": 8.247237205505371,
2130
- "kl": 0.2578125,
2131
- "learning_rate": 8.695278969957082e-07,
2132
- "loss": 0.0258,
2133
- "reward": 0.40625,
2134
- "reward_mean": 0.40625,
2135
- "reward_std": 0.4944729208946228,
2136
- "rewards/accuracy_reward": 0.40625,
2137
- "step": 152
2138
- },
2139
- {
2140
- "advantages": -1.862645149230957e-08,
2141
- "completion_length": 135.6875,
2142
- "epoch": 0.1313304721030043,
2143
- "grad_norm": 5.90345573425293,
2144
- "kl": 0.197265625,
2145
- "learning_rate": 8.686695278969956e-07,
2146
- "loss": 0.0197,
2147
- "reward": 0.5625,
2148
- "reward_mean": 0.5625,
2149
- "reward_std": 0.49022960662841797,
2150
- "rewards/accuracy_reward": 0.5625,
2151
- "step": 153
2152
- },
2153
- {
2154
- "advantages": 1.4901161193847656e-08,
2155
- "completion_length": 134.53125,
2156
- "epoch": 0.13218884120171673,
2157
- "grad_norm": 5.609891891479492,
2158
- "kl": 0.171875,
2159
- "learning_rate": 8.678111587982832e-07,
2160
- "loss": 0.0172,
2161
- "reward": 0.40625,
2162
- "reward_mean": 0.40625,
2163
- "reward_std": 0.5123760104179382,
2164
- "rewards/accuracy_reward": 0.40625,
2165
- "step": 154
2166
- },
2167
- {
2168
- "advantages": -1.1175870895385742e-08,
2169
- "completion_length": 122.59375,
2170
- "epoch": 0.13304721030042918,
2171
- "grad_norm": 4.156961917877197,
2172
- "kl": 0.1435546875,
2173
- "learning_rate": 8.669527896995707e-07,
2174
- "loss": 0.0143,
2175
- "reward": 0.53125,
2176
- "reward_mean": 0.53125,
2177
- "reward_std": 0.3377464711666107,
2178
- "rewards/accuracy_reward": 0.53125,
2179
- "step": 155
2180
- },
2181
- {
2182
- "advantages": -1.862645149230957e-09,
2183
- "completion_length": 104.71875,
2184
- "epoch": 0.13390557939914163,
2185
- "grad_norm": 7.677206993103027,
2186
- "kl": 0.291015625,
2187
- "learning_rate": 8.660944206008583e-07,
2188
- "loss": 0.0291,
2189
- "reward": 0.21875,
2190
- "reward_mean": 0.21875,
2191
- "reward_std": 0.24511480331420898,
2192
- "rewards/accuracy_reward": 0.21875,
2193
- "step": 156
2194
- },
2195
- {
2196
- "advantages": -1.30385160446167e-08,
2197
- "completion_length": 130.375,
2198
- "epoch": 0.13476394849785409,
2199
- "grad_norm": 4.416824817657471,
2200
- "kl": 0.166015625,
2201
- "learning_rate": 8.652360515021458e-07,
2202
- "loss": 0.0166,
2203
- "reward": 0.625,
2204
- "reward_mean": 0.625,
2205
- "reward_std": 0.4492306709289551,
2206
- "rewards/accuracy_reward": 0.625,
2207
- "step": 157
2208
- },
2209
- {
2210
- "advantages": 1.6763806343078613e-08,
2211
- "completion_length": 119.46875,
2212
- "epoch": 0.1356223175965665,
2213
- "grad_norm": 3.8171494007110596,
2214
- "kl": 0.1728515625,
2215
- "learning_rate": 8.643776824034334e-07,
2216
- "loss": 0.0173,
2217
- "reward": 0.15625,
2218
- "reward_mean": 0.15625,
2219
- "reward_std": 0.24511480331420898,
2220
- "rewards/accuracy_reward": 0.15625,
2221
- "step": 158
2222
- },
2223
- {
2224
- "advantages": 9.313225746154785e-09,
2225
- "completion_length": 107.59375,
2226
- "epoch": 0.13648068669527896,
2227
- "grad_norm": 5.097965717315674,
2228
- "kl": 0.1787109375,
2229
- "learning_rate": 8.63519313304721e-07,
2230
- "loss": 0.0179,
2231
- "reward": 0.59375,
2232
- "reward_mean": 0.59375,
2233
- "reward_std": 0.38816186785697937,
2234
- "rewards/accuracy_reward": 0.59375,
2235
- "step": 159
2236
- },
2237
- {
2238
- "advantages": 1.30385160446167e-08,
2239
- "completion_length": 110.875,
2240
- "epoch": 0.13733905579399142,
2241
- "grad_norm": 5.293290138244629,
2242
- "kl": 0.169921875,
2243
- "learning_rate": 8.626609442060086e-07,
2244
- "loss": 0.017,
2245
- "reward": 0.375,
2246
- "reward_mean": 0.375,
2247
- "reward_std": 0.4492306709289551,
2248
- "rewards/accuracy_reward": 0.375,
2249
- "step": 160
2250
- }
2251
- ],
2252
- "logging_steps": 1.0,
2253
- "max_steps": 1165,
2254
- "num_input_tokens_seen": 0,
2255
- "num_train_epochs": 1,
2256
- "save_steps": 10,
2257
- "stateful_callbacks": {
2258
- "TrainerControl": {
2259
- "args": {
2260
- "should_epoch_stop": false,
2261
- "should_evaluate": false,
2262
- "should_log": false,
2263
- "should_save": true,
2264
- "should_training_stop": false
2265
- },
2266
- "attributes": {}
2267
- }
2268
- },
2269
- "total_flos": 0.0,
2270
- "train_batch_size": 1,
2271
- "trial_name": null,
2272
- "trial_params": null
2273
- }
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
checkpoint-160/training_args.bin DELETED
@@ -1,3 +0,0 @@
1
- version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8738459828a454257f90ce379157027577338807999ffff54bb828d59425343
3
- size 8312
 
 
 
 
checkpoint-160/vocab.json DELETED
The diff for this file is too large to render. See raw diff