train model
Browse files- scripts/model.yaml +14 -17
scripts/model.yaml
CHANGED
@@ -108,26 +108,23 @@ eval:
|
|
108 |
final_validation: false
|
109 |
|
110 |
# Optimizer-related arguments
|
111 |
-
optimizer:
|
112 |
-
|
113 |
-
#
|
114 |
-
|
115 |
-
#
|
116 |
-
# # class_path: bitsandbytes.optim.PagedAdamW
|
117 |
-
# # class_path: bitsandbytes.optim.AdamW8bit
|
118 |
-
# # class_path: bitsandbytes.optim.PagedAdamW8bit
|
119 |
|
120 |
-
|
121 |
-
#
|
122 |
-
|
123 |
|
124 |
-
#
|
125 |
-
|
126 |
|
127 |
-
#
|
128 |
-
|
129 |
-
|
130 |
-
|
131 |
|
132 |
# How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
|
133 |
devices: auto
|
|
|
108 |
final_validation: false
|
109 |
|
110 |
# Optimizer-related arguments
|
111 |
+
optimizer:
|
112 |
+
# class_path: torch.optim.AdamW
|
113 |
+
# class_path: grokadamw.GrokAdamW
|
114 |
+
class_path: bitsandbytes.optim.AdamW8bit
|
115 |
+
# class_path: bitsandbytes.optim.PagedAdamW8bit
|
|
|
|
|
|
|
116 |
|
117 |
+
init_args:
|
118 |
+
# (type: float, default: 0.001)
|
119 |
+
lr: 5e-5
|
120 |
|
121 |
+
# (type: float, default: 0.01)
|
122 |
+
weight_decay: 0.1
|
123 |
|
124 |
+
# (type: tuple, default: (0.9,0.999))
|
125 |
+
betas:
|
126 |
+
- 0.9
|
127 |
+
- 0.95
|
128 |
|
129 |
# How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
|
130 |
devices: auto
|