Spaces:
Sleeping
Sleeping
# Copyright (c) 2023 Amphion. | |
# | |
# This source code is licensed under the MIT license found in the | |
# LICENSE file in the root directory of this source tree. | |
import torch | |
import math | |
from torch import nn | |
from torch.nn import functional as F | |
from .conv import Conv1d as conv_Conv1d | |
def Conv1d(in_channels, out_channels, kernel_size, dropout=0, **kwargs): | |
m = conv_Conv1d(in_channels, out_channels, kernel_size, **kwargs) | |
nn.init.kaiming_normal_(m.weight, nonlinearity="relu") | |
if m.bias is not None: | |
nn.init.constant_(m.bias, 0) | |
return nn.utils.weight_norm(m) | |
def Conv1d1x1(in_channels, out_channels, bias=True): | |
return Conv1d( | |
in_channels, out_channels, kernel_size=1, padding=0, dilation=1, bias=bias | |
) | |
def _conv1x1_forward(conv, x, is_incremental): | |
if is_incremental: | |
x = conv.incremental_forward(x) | |
else: | |
x = conv(x) | |
return x | |
class ResidualConv1dGLU(nn.Module): | |
"""Residual dilated conv1d + Gated linear unit | |
Args: | |
residual_channels (int): Residual input / output channels | |
gate_channels (int): Gated activation channels. | |
kernel_size (int): Kernel size of convolution layers. | |
skip_out_channels (int): Skip connection channels. If None, set to same | |
as ``residual_channels``. | |
cin_channels (int): Local conditioning channels. If negative value is | |
set, local conditioning is disabled. | |
dropout (float): Dropout probability. | |
padding (int): Padding for convolution layers. If None, proper padding | |
is computed depends on dilation and kernel_size. | |
dilation (int): Dilation factor. | |
""" | |
def __init__( | |
self, | |
residual_channels, | |
gate_channels, | |
kernel_size, | |
skip_out_channels=None, | |
cin_channels=-1, | |
dropout=1 - 0.95, | |
padding=None, | |
dilation=1, | |
causal=True, | |
bias=True, | |
*args, | |
**kwargs, | |
): | |
super(ResidualConv1dGLU, self).__init__() | |
self.dropout = dropout | |
if skip_out_channels is None: | |
skip_out_channels = residual_channels | |
if padding is None: | |
# no future time stamps available | |
if causal: | |
padding = (kernel_size - 1) * dilation | |
else: | |
padding = (kernel_size - 1) // 2 * dilation | |
self.causal = causal | |
self.conv = Conv1d( | |
residual_channels, | |
gate_channels, | |
kernel_size, | |
padding=padding, | |
dilation=dilation, | |
bias=bias, | |
*args, | |
**kwargs, | |
) | |
# mel conditioning | |
self.conv1x1c = Conv1d1x1(cin_channels, gate_channels, bias=False) | |
gate_out_channels = gate_channels // 2 | |
self.conv1x1_out = Conv1d1x1(gate_out_channels, residual_channels, bias=bias) | |
self.conv1x1_skip = Conv1d1x1(gate_out_channels, skip_out_channels, bias=bias) | |
def forward(self, x, c=None): | |
return self._forward(x, c, False) | |
def incremental_forward(self, x, c=None): | |
return self._forward(x, c, True) | |
def clear_buffer(self): | |
for c in [ | |
self.conv, | |
self.conv1x1_out, | |
self.conv1x1_skip, | |
self.conv1x1c, | |
]: | |
if c is not None: | |
c.clear_buffer() | |
def _forward(self, x, c, is_incremental): | |
"""Forward | |
Args: | |
x (Tensor): B x C x T | |
c (Tensor): B x C x T, Mel conditioning features | |
Returns: | |
Tensor: output | |
""" | |
residual = x | |
x = F.dropout(x, p=self.dropout, training=self.training) | |
if is_incremental: | |
splitdim = -1 | |
x = self.conv.incremental_forward(x) | |
else: | |
splitdim = 1 | |
x = self.conv(x) | |
# remove future time steps | |
x = x[:, :, : residual.size(-1)] if self.causal else x | |
a, b = x.split(x.size(splitdim) // 2, dim=splitdim) | |
assert self.conv1x1c is not None | |
c = _conv1x1_forward(self.conv1x1c, c, is_incremental) | |
ca, cb = c.split(c.size(splitdim) // 2, dim=splitdim) | |
a, b = a + ca, b + cb | |
x = torch.tanh(a) * torch.sigmoid(b) | |
# For skip connection | |
s = _conv1x1_forward(self.conv1x1_skip, x, is_incremental) | |
# For residual connection | |
x = _conv1x1_forward(self.conv1x1_out, x, is_incremental) | |
x = (x + residual) * math.sqrt(0.5) | |
return x, s | |