Spaces:
Running
Running
File size: 3,656 Bytes
aad5337 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 |
# https://github.com/joonson/syncnet_python/blob/master/SyncNetModel.py
import torch
import torch.nn as nn
def save(model, filename):
with open(filename, "wb") as f:
torch.save(model, f)
print("%s saved." % filename)
def load(filename):
net = torch.load(filename)
return net
class S(nn.Module):
def __init__(self, num_layers_in_fc_layers=1024):
super(S, self).__init__()
self.__nFeatures__ = 24
self.__nChs__ = 32
self.__midChs__ = 32
self.netcnnaud = nn.Sequential(
nn.Conv2d(1, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.BatchNorm2d(64),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=(1, 1), stride=(1, 1)),
nn.Conv2d(64, 192, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1)),
nn.BatchNorm2d(192),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=(3, 3), stride=(1, 2)),
nn.Conv2d(192, 384, kernel_size=(3, 3), padding=(1, 1)),
nn.BatchNorm2d(384),
nn.ReLU(inplace=True),
nn.Conv2d(384, 256, kernel_size=(3, 3), padding=(1, 1)),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.Conv2d(256, 256, kernel_size=(3, 3), padding=(1, 1)),
nn.BatchNorm2d(256),
nn.ReLU(inplace=True),
nn.MaxPool2d(kernel_size=(3, 3), stride=(2, 2)),
nn.Conv2d(256, 512, kernel_size=(5, 4), padding=(0, 0)),
nn.BatchNorm2d(512),
nn.ReLU(),
)
self.netfcaud = nn.Sequential(
nn.Linear(512, 512),
nn.BatchNorm1d(512),
nn.ReLU(),
nn.Linear(512, num_layers_in_fc_layers),
)
self.netfclip = nn.Sequential(
nn.Linear(512, 512),
nn.BatchNorm1d(512),
nn.ReLU(),
nn.Linear(512, num_layers_in_fc_layers),
)
self.netcnnlip = nn.Sequential(
nn.Conv3d(3, 96, kernel_size=(5, 7, 7), stride=(1, 2, 2), padding=0),
nn.BatchNorm3d(96),
nn.ReLU(inplace=True),
nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2)),
nn.Conv3d(96, 256, kernel_size=(1, 5, 5), stride=(1, 2, 2), padding=(0, 1, 1)),
nn.BatchNorm3d(256),
nn.ReLU(inplace=True),
nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2), padding=(0, 1, 1)),
nn.Conv3d(256, 256, kernel_size=(1, 3, 3), padding=(0, 1, 1)),
nn.BatchNorm3d(256),
nn.ReLU(inplace=True),
nn.Conv3d(256, 256, kernel_size=(1, 3, 3), padding=(0, 1, 1)),
nn.BatchNorm3d(256),
nn.ReLU(inplace=True),
nn.Conv3d(256, 256, kernel_size=(1, 3, 3), padding=(0, 1, 1)),
nn.BatchNorm3d(256),
nn.ReLU(inplace=True),
nn.MaxPool3d(kernel_size=(1, 3, 3), stride=(1, 2, 2)),
nn.Conv3d(256, 512, kernel_size=(1, 6, 6), padding=0),
nn.BatchNorm3d(512),
nn.ReLU(inplace=True),
)
def forward_aud(self, x):
mid = self.netcnnaud(x)
# N x ch x 24 x M
mid = mid.view((mid.size()[0], -1))
# N x (ch x 24)
out = self.netfcaud(mid)
return out
def forward_lip(self, x):
mid = self.netcnnlip(x)
mid = mid.view((mid.size()[0], -1))
# N x (ch x 24)
out = self.netfclip(mid)
return out
def forward_lipfeat(self, x):
mid = self.netcnnlip(x)
out = mid.view((mid.size()[0], -1))
# N x (ch x 24)
return out
|