|
|
|
|
|
|
|
TRAIN_SIZE: 0.7 |
|
|
|
|
|
max_len: 1000 |
|
num_tissues: 8 |
|
expressed_threshold: 0.1 |
|
random_seed: 766 |
|
|
|
dnabert: |
|
max_seq_len: 512 |
|
kmer: 6 |
|
test_size: 0.2 |
|
tokenizer: |
|
vocab_size: 5000 |
|
data: |
|
max_seq_len: 1000 |
|
test_size: 0.2 |
|
num_labels: 8 |
|
training: |
|
pretrain: |
|
num_train_epochs: 3 |
|
per_device_train_batch_size: 64 |
|
per_device_eval_batch_size: 64 |
|
fp16: true |
|
logging_steps: 50 |
|
eval_steps: 200 |
|
save_steps: 100 |
|
save_total_limit: 20 |
|
gradient_accumulation_steps: 25 |
|
learning_rate: 1.e-4 |
|
weight_decay: 0 |
|
adam_epsilon: 1.e-8 |
|
max_grad_norm: 10 |
|
warmup_steps: 50 |
|
optimizer: "lamb" |
|
scheduler: "linear" |
|
mlm_prob: 0.15 |
|
finetune: |
|
|
|
num_train_epochs: 3 |
|
per_device_train_batch_size: 64 |
|
per_device_eval_batch_size: 8 |
|
fp16: true |
|
logging_steps: 50 |
|
eval_steps: 500 |
|
save_steps: 500 |
|
save_total_limit: 10 |
|
|
|
gradient_accumulation_steps: 10 |
|
eval_accumulation_steps: 64 |
|
learning_rate: 1.e-3 |
|
|
|
|
|
betas: |
|
- 0.9 |
|
- 0.999 |
|
eps: 1.e-8 |
|
weight_decay: 0 |
|
adam_epsilon: 1.e-8 |
|
max_grad_norm: 10 |
|
warmup_steps: 200 |
|
num_cooldown_steps: 2000 |
|
optimizer: "lamb" |
|
|
|
|
|
scheduler: "constant" |
|
|
|
|
|
delay_size: 0 |
|
models: |
|
roberta-base: |
|
num_attention_heads: 6 |
|
num_hidden_layers: 6 |
|
type_vocab_size: 1 |
|
block_size: 258 |
|
max_tokenized_len: 256 |
|
roberta-lm: {} |
|
roberta-pred: {} |
|
roberta-pred-mean-pool: |
|
hidden_dropout_prob: 0.2 |
|
output_mode: "regression" |
|
|
|
|
|
threshold: 1 |
|
alpha: 0.1 |
|
dnabert-base: |
|
block_size: 512 |
|
max_tokenized_len: 510 |
|
dnabert-lm: {} |
|
dnabert-pred: {} |
|
dnabert-pred-mean-pool: |
|
hidden_dropout_prob: 0.2 |
|
output_mode: "regression" |
|
|