File size: 806 Bytes
237581f |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 |
import torch.nn.functional as F
hpdict={'num_layers': 5,
'd_model': 896,
'num_heads': 14,
'dff': 2389,
'Gcachelst': './predefined_G_LM_cache_list_IDENTITY_5layer_14head_64x64_paper.pkl',
'input_vocab_size': 32000,
'max_seq_len': 1024,
'epochs': 1,
'save_model_path': './PLDRv51G-106M-2-checkpoint',
'warmup_steps': 2000,
'lr_total_steps': 250000,
'learning_rate': 0.0003,
'lr_alpha': 0.1,
'adamw_decay': 0.1,
'activation': F.silu,
'disable_amp': False,
'auto_size_minimum': None,
'disable_fsdp_mixed_precision': False,
'fsdp_cpu_offload': False,
'fsdp_sharding_strategy': 'HYBRID_SHARD',
'backward_prefetch': 'PRE',
'save_type': 'torch'} |