|
|
|
|
|
|
|
|
|
num_chars = 37 |
|
max_seq_len = 26 |
|
|
|
label_convertor = dict( |
|
type='ABIConvertor', |
|
dict_type='DICT36', |
|
with_unknown=False, |
|
with_padding=False, |
|
lower=True, |
|
) |
|
|
|
model = dict( |
|
type='ABINet', |
|
backbone=dict(type='ResNetABI'), |
|
encoder=dict( |
|
type='ABIVisionModel', |
|
encoder=dict( |
|
type='TransformerEncoder', |
|
n_layers=3, |
|
n_head=8, |
|
d_model=512, |
|
d_inner=2048, |
|
dropout=0.1, |
|
max_len=8 * 32, |
|
), |
|
decoder=dict( |
|
type='ABIVisionDecoder', |
|
in_channels=512, |
|
num_channels=64, |
|
attn_height=8, |
|
attn_width=32, |
|
attn_mode='nearest', |
|
use_result='feature', |
|
num_chars=num_chars, |
|
max_seq_len=max_seq_len, |
|
init_cfg=dict(type='Xavier', layer='Conv2d')), |
|
), |
|
decoder=dict( |
|
type='ABILanguageDecoder', |
|
d_model=512, |
|
n_head=8, |
|
d_inner=2048, |
|
n_layers=4, |
|
dropout=0.1, |
|
detach_tokens=True, |
|
use_self_attn=False, |
|
pad_idx=num_chars - 1, |
|
num_chars=num_chars, |
|
max_seq_len=max_seq_len, |
|
init_cfg=None), |
|
fuser=dict( |
|
type='ABIFuser', |
|
d_model=512, |
|
num_chars=num_chars, |
|
init_cfg=None, |
|
max_seq_len=max_seq_len, |
|
), |
|
loss=dict( |
|
type='ABILoss', |
|
enc_weight=1.0, |
|
dec_weight=1.0, |
|
fusion_weight=1.0, |
|
num_classes=num_chars), |
|
label_convertor=label_convertor, |
|
max_seq_len=max_seq_len, |
|
iter_size=3) |
|
|