mtasic85 commited on
Commit
c689557
1 Parent(s): bb639be

train model

Browse files
Files changed (1) hide show
  1. scripts/model.yaml +14 -17
scripts/model.yaml CHANGED
@@ -108,26 +108,23 @@ eval:
108
  final_validation: false
109
 
110
  # Optimizer-related arguments
111
- optimizer: bitsandbytes.optim.PagedAdamW8bit
112
-
113
- # optimizer:
114
- # class_path: torch.optim.AdamW
115
- # # class_path: grokadamw.GrokAdamW
116
- # # class_path: bitsandbytes.optim.PagedAdamW
117
- # # class_path: bitsandbytes.optim.AdamW8bit
118
- # # class_path: bitsandbytes.optim.PagedAdamW8bit
119
 
120
- # init_args:
121
- # # (type: float, default: 0.001)
122
- # lr: 5e-5
123
 
124
- # # (type: float, default: 0.01)
125
- # weight_decay: 0.1
126
 
127
- # # (type: tuple, default: (0.9,0.999))
128
- # betas:
129
- # - 0.9
130
- # - 0.95
131
 
132
  # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
133
  devices: auto
 
108
  final_validation: false
109
 
110
  # Optimizer-related arguments
111
+ optimizer:
112
+ # class_path: torch.optim.AdamW
113
+ # class_path: grokadamw.GrokAdamW
114
+ class_path: bitsandbytes.optim.AdamW8bit
115
+ # class_path: bitsandbytes.optim.PagedAdamW8bit
 
 
 
116
 
117
+ init_args:
118
+ # (type: float, default: 0.001)
119
+ lr: 5e-5
120
 
121
+ # (type: float, default: 0.01)
122
+ weight_decay: 0.1
123
 
124
+ # (type: tuple, default: (0.9,0.999))
125
+ betas:
126
+ - 0.9
127
+ - 0.95
128
 
129
  # How many devices/GPUs to use. Uses all GPUs by default. (type: Union[int, str], default: auto)
130
  devices: auto