File size: 5,484 Bytes
a4d0945
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157

import torch
import torch.nn as nn

class MultiLayeredConv1d(torch.nn.Module):
    """Multi-layered conv1d for Transformer block.



    This is a module of multi-layered conv1d designed to replace position-wise feed-forward network

    in Transformer block, which is introduced in `FastSpeech: Fast, Robust and Controllable Text to Speech`_.



    Args:

        in_chans (int): Number of input channels.

        hidden_chans (int): Number of hidden channels.

        kernel_size (int): Kernel size of conv1d.

        dropout_rate (float): Dropout rate.



    .. _`FastSpeech: Fast, Robust and Controllable Text to Speech`:

        https://arxiv.org/pdf/1905.09263.pdf



    """

    def __init__(

        self, in_chans: int, hidden_chans: int, kernel_size: int, dropout_rate: float

    ):
        super(MultiLayeredConv1d, self).__init__()
        self.w_1 = torch.nn.Conv1d(
            in_chans,
            hidden_chans,
            kernel_size,
            stride=1,
            padding=(kernel_size - 1) // 2,
        )
        self.w_2 = torch.nn.Conv1d(
            hidden_chans, in_chans, 1, stride=1, padding=(1 - 1) // 2
        )
        self.dropout = torch.nn.Dropout(dropout_rate)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """Calculate forward propagation.



        Args:

            x (Tensor): Batch of input tensors (B, *, in_chans).



        Returns:

            Tensor: Batch of output tensors (B, *, hidden_chans)



        """
        x = torch.relu(self.w_1(x.transpose(-1, 1))).transpose(-1, 1)
        return self.w_2(self.dropout(x).transpose(-1, 1)).transpose(-1, 1)

class MultiLayeredConv1d(torch.nn.Module):
    """Multi-layered conv1d for Transformer block.



    This is a module of multi-leyered conv1d designed to replace positionwise feed-forward network

    in Transforner block, which is introduced in `FastSpeech: Fast, Robust and Controllable Text to Speech`_.



    Args:

        in_chans (int): Number of input channels.

        hidden_chans (int): Number of hidden channels.

        kernel_size (int): Kernel size of conv1d.

        dropout_rate (float): Dropout rate.



    .. _`FastSpeech: Fast, Robust and Controllable Text to Speech`:

        https://arxiv.org/pdf/1905.09263.pdf



    """

    def __init__(

        self, in_chans: int, hidden_chans: int, kernel_size=5, dropout_rate=0.0,

    ):
        super(MultiLayeredConv1d, self).__init__()
        self.w_1 = torch.nn.Conv1d(
            in_chans,
            hidden_chans,
            kernel_size,
            stride=1,
            padding=(kernel_size - 1) // 2,
        )
        self.w_2 = torch.nn.Conv1d(
            hidden_chans, in_chans, 1, stride=1, padding=(1 - 1) // 2
        )
        self.dropout = torch.nn.Dropout(dropout_rate)

    def forward(self, x: torch.Tensor) -> torch.Tensor:
        """Calculate forward propagation.



        Args:

            x (Tensor): Batch of input tensors (B, *, in_chans).



        Returns:

            Tensor: Batch of output tensors (B, *, hidden_chans)



        """
        x = torch.relu(self.w_1(x.transpose(-1, 1))).transpose(-1, 1)
        return self.w_2(self.dropout(x).transpose(-1, 1)).transpose(-1, 1)

class Swish(torch.nn.Module):
    """

    Construct an Swish activation function for Conformer.

    """

    def forward(self, x):
        """

        Return Swish activation function.

        """
        return x * torch.sigmoid(x)
class ConvolutionModule(nn.Module):
    """

    ConvolutionModule in Conformer model.



    Args:

        channels (int): The number of channels of conv layers.

        kernel_size (int): Kernel size of conv layers.



    """

    def __init__(self, channels, kernel_size, activation=Swish(), ignore_prefix_len=0, bias=True):
        super(ConvolutionModule, self).__init__()
        # kernel_size should be an odd number for 'SAME' padding
        assert (kernel_size - 1) % 2 == 0

        self.pointwise_conv1 = nn.Conv1d(channels, 2 * channels, kernel_size=1, stride=1, padding=0, bias=bias, )
        self.depthwise_conv = nn.Conv1d(channels, channels, kernel_size, stride=1, padding=(kernel_size - 1) // 2, groups=channels, bias=bias, )
        self.norm = nn.GroupNorm(num_groups=32, num_channels=channels)
        self.pointwise_conv2 = nn.Conv1d(channels, channels, kernel_size=1, stride=1, padding=0, bias=bias, )
        self.activation = activation
        self.ignore_prefix_len = ignore_prefix_len

    def forward(self, x):
        """

        Compute convolution module.



        Args:

            x (torch.Tensor): Input tensor (#batch, time, channels).



        Returns:

            torch.Tensor: Output tensor (#batch, time, channels).



        """
        # exchange the temporal dimension and the feature dimension
        x = x.transpose(1, 2)

        # GLU mechanism
        x = self.pointwise_conv1(x)  # (batch, 2*channel, dim)
        x = nn.functional.glu(x, dim=1)  # (batch, channel, dim)

        # 1D Depthwise Conv
        x_sub = self.depthwise_conv(x[..., self.ignore_prefix_len:])
        x_sub = self.activation(self.norm(x_sub))
        x_pre = x[..., :self.ignore_prefix_len]
        # x = self.depthwise_conv(x)
        # x = self.activation(self.norm(x))
        x = torch.cat([x_pre, x_sub], dim=-1)

        x = self.pointwise_conv2(x)

        return x.transpose(1, 2)