|
import numpy as np |
|
from torch import nn |
|
from torch.nn.utils import weight_norm |
|
|
|
|
|
class MelganDiscriminator(nn.Module): |
|
def __init__( |
|
self, |
|
in_channels=1, |
|
out_channels=1, |
|
kernel_sizes=(5, 3), |
|
base_channels=16, |
|
max_channels=1024, |
|
downsample_factors=(4, 4, 4, 4), |
|
groups_denominator=4, |
|
): |
|
super().__init__() |
|
self.layers = nn.ModuleList() |
|
|
|
layer_kernel_size = np.prod(kernel_sizes) |
|
layer_padding = (layer_kernel_size - 1) // 2 |
|
|
|
|
|
self.layers += [ |
|
nn.Sequential( |
|
nn.ReflectionPad1d(layer_padding), |
|
weight_norm(nn.Conv1d(in_channels, base_channels, layer_kernel_size, stride=1)), |
|
nn.LeakyReLU(0.2, inplace=True), |
|
) |
|
] |
|
|
|
|
|
layer_in_channels = base_channels |
|
for downsample_factor in downsample_factors: |
|
layer_out_channels = min(layer_in_channels * downsample_factor, max_channels) |
|
layer_kernel_size = downsample_factor * 10 + 1 |
|
layer_padding = (layer_kernel_size - 1) // 2 |
|
layer_groups = layer_in_channels // groups_denominator |
|
self.layers += [ |
|
nn.Sequential( |
|
weight_norm( |
|
nn.Conv1d( |
|
layer_in_channels, |
|
layer_out_channels, |
|
kernel_size=layer_kernel_size, |
|
stride=downsample_factor, |
|
padding=layer_padding, |
|
groups=layer_groups, |
|
) |
|
), |
|
nn.LeakyReLU(0.2, inplace=True), |
|
) |
|
] |
|
layer_in_channels = layer_out_channels |
|
|
|
|
|
layer_padding1 = (kernel_sizes[0] - 1) // 2 |
|
layer_padding2 = (kernel_sizes[1] - 1) // 2 |
|
self.layers += [ |
|
nn.Sequential( |
|
weight_norm( |
|
nn.Conv1d( |
|
layer_out_channels, |
|
layer_out_channels, |
|
kernel_size=kernel_sizes[0], |
|
stride=1, |
|
padding=layer_padding1, |
|
) |
|
), |
|
nn.LeakyReLU(0.2, inplace=True), |
|
), |
|
weight_norm( |
|
nn.Conv1d( |
|
layer_out_channels, out_channels, kernel_size=kernel_sizes[1], stride=1, padding=layer_padding2 |
|
) |
|
), |
|
] |
|
|
|
def forward(self, x): |
|
feats = [] |
|
for layer in self.layers: |
|
x = layer(x) |
|
feats.append(x) |
|
return x, feats |
|
|