GazeGenie / models /BERT_fin_exp_20240107-152040.yaml
hugpv's picture
initial commit
da572bf
raw
history blame
2.45 kB
add_layer_norm_to_char_mlp: true
add_layer_norm_to_in_projection: false
add_line_overlap_feature: true
add_normalised_values_as_features: false
change_pooling_for_timm_head_to: AdaptiveAvgPool2d
char_dims: 0
char_plot_shape:
- 224
- 224
chars_bert_reduction_factor: 4
chars_conv_lr_reduction_factor: 1
chars_conv_pooling_out_dim: 1
convert_posix: false
convert_winpath: false
cv_char_modelname: coatnet_nano_rw_224
cv_modelname: null
early_stopping_patience: 15
gamma_multistep: null
gamma_step_factor: 0.5
gamma_step_size: 3000
head_multiplication_factor: 64
hidden_dim_bert: 512
hidden_dropout_prob: 0.0
im_partial_string: fixations_chars_channel_sep
input_padding_val: 10
last_activation: Identity
layer_norm_after_in_projection: true
linear_activation: GELU
load_best_checkpoint_at_end: false
loss_function: corn_loss
lr: 0.0004
lr_initial: '0.0004'
lr_sched_exp_fac: null
lr_scheduling: StepLR
manual_max_sequence_for_model: 500
max_len_chars_list: 0
max_seq_length: 500
method_chars_into_model: resnet
method_to_include_char_positions: concat
min_lr_anneal: 1e-6
model_to_use: BERT
multistep_milestones: null
n_layers_BERT: 4
norm_by_char_averages: false
norm_by_line_width: false
norm_coords_by_letter_min_x_y: true
normalize_by_line_height_and_width: true
num_attention_heads: 8
num_classes: 16
num_lin_layers: 1
num_warmup_steps: 3000
one_hot_y: false
ord_reg_loss_max: 16
ord_reg_loss_min: -1
padding_at_end: true
plot_histogram: true
plot_learning_curves: true
precision: 16-mixed
prediction_only: false
pretrained_model_name_to_load: null
profile_torch_run: false
reload_model: false
reload_model_date: null
remove_eval_idx_from_train_idx: true
remove_timm_classifier_head_pooling: true
sample_cols:
- x
- y
sample_means:
- 0.4423
- 3.1164
- 2.4717
sample_std:
- 0.2778
- 1.882
- 1.8562
sample_std_unscaled:
- 285.193
- 131.1842
- 1.8562
save_weights_only: true
set_max_seq_len_manually: true
set_num_classes_manually: true
source_for_pretrained_cv_model: timm
target_padding_number: -100
track_activations_via_hook: false
track_gradient_histogram: false
use_char_bounding_boxes: true
use_early_stopping: false
use_embedded_char_pos_info: true
use_fixation_duration_information: false
use_in_projection_bias: false
use_lr_warmup: true
use_pupil_size_information: false
use_reduce_on_plateau: false
use_start_time_as_input_col: false
use_training_steps_for_end_and_lr_decay: true
use_words_coords: false
warmup_exponent: 1
weight_decay: 0.0