# ################################# # Basic inference parameters for speaker-id. We have first a network that # computes some embeddings. On the top of that, we employ a classifier. # # Author: # * Mirco Ravanelli 2021 # ################################# # pretrain folders: pretrained_path: jefson08/spkrec-ecapa-voxceleb-kha # Model parameters n_mels: 23 sample_rate: 16000 n_classes: 28 # In this case, we have 28 speakers emb_dim: 512 # dimensionality of the embeddings # Feature extraction compute_features: !new:speechbrain.lobes.features.Fbank n_mels: !ref # Mean and std normalization of the input features mean_var_norm: !new:speechbrain.processing.features.InputNormalization norm_type: sentence std_norm: False # To design a custom model, either just edit the simple CustomModel # class that's listed here, or replace this `!new` call with a line # pointing to a different file you've defined. embedding_model: !new:custom_model.Xvector in_channels: !ref activation: !name:torch.nn.LeakyReLU tdnn_blocks: 5 tdnn_channels: [512, 512, 512, 512, 1500] tdnn_kernel_sizes: [5, 3, 3, 1, 1] tdnn_dilations: [1, 2, 3, 1, 1] lin_neurons: !ref classifier: !new:custom_model.Classifier input_shape: [null, null, !ref ] activation: !name:torch.nn.LeakyReLU lin_blocks: 1 lin_neurons: !ref out_neurons: !ref label_encoder: !new:speechbrain.dataio.encoder.CategoricalEncoder # Objects in "modules" dict will have their parameters moved to the correct # device, as well as having train()/eval() called on them by the Brain class. modules: compute_features: !ref embedding_model: !ref classifier: !ref mean_var_norm: !ref pretrainer: !new:speechbrain.utils.parameter_transfer.Pretrainer loadables: embedding_model: !ref classifier: !ref label_encoder: !ref paths: embedding_model: !ref /embedding_model.ckpt classifier: !ref /classifier.ckpt label_encoder: !ref /label_encoder.txt