FloraBERT / config.yaml
Gurveer05's picture
Added pred func
bf1f674
# Project-wide configuration settings
# Variables for train-test-split
TRAIN_SIZE: 0.7
# General parameters
max_len: 1000
num_tissues: 8
expressed_threshold: 0.1
random_seed: 766
dnabert:
max_seq_len: 512
kmer: 6
test_size: 0.2
tokenizer:
vocab_size: 5000
data:
max_seq_len: 1000
test_size: 0.2
num_labels: 8
training:
pretrain:
num_train_epochs: 3
per_device_train_batch_size: 64
per_device_eval_batch_size: 64
fp16: true
logging_steps: 50
eval_steps: 200
save_steps: 100
save_total_limit: 20
gradient_accumulation_steps: 25
learning_rate: 1.e-4
weight_decay: 0
adam_epsilon: 1.e-8
max_grad_norm: 10
warmup_steps: 50
optimizer: "lamb"
scheduler: "linear"
mlm_prob: 0.15
finetune:
# num_train_epochs: 10
num_train_epochs: 3
per_device_train_batch_size: 64
per_device_eval_batch_size: 8
fp16: true
logging_steps: 50
eval_steps: 500
save_steps: 500
save_total_limit: 10
# gradient_accumulation_steps: 1
gradient_accumulation_steps: 10
eval_accumulation_steps: 64
learning_rate: 1.e-3
# learning_rate: 1.e-1
# lr: 1.e-3
betas:
- 0.9
- 0.999
eps: 1.e-8
weight_decay: 0
adam_epsilon: 1.e-8
max_grad_norm: 10
warmup_steps: 200
num_cooldown_steps: 2000
optimizer: "lamb"
# optimizer: "adamw"
# scheduler: "delay"
scheduler: "constant"
# num_param_groups: 0
# param_group_size: 2 # Except for the classification head, which has param_group_size == 1
delay_size: 0
models:
roberta-base:
num_attention_heads: 6
num_hidden_layers: 6
type_vocab_size: 1
block_size: 258
max_tokenized_len: 256
roberta-lm: {}
roberta-pred: {}
roberta-pred-mean-pool:
hidden_dropout_prob: 0.2
output_mode: "regression"
# For sparse (bce + mse) loss
# output_mode: "sparse"
threshold: 1
alpha: 0.1
dnabert-base:
block_size: 512
max_tokenized_len: 510
dnabert-lm: {}
dnabert-pred: {}
dnabert-pred-mean-pool:
hidden_dropout_prob: 0.2
output_mode: "regression"