sentiment-analysis / layer.py
sadjava's picture
added app
ba4df19
import numpy as np
import torch
from torch import nn
import torch.nn.functional as F
from torch.nn.utils.rnn import pack_padded_sequence, pad_packed_sequence
class DynamicLayerConfig:
"""
Arguments for nn.Embedding layer:
vocab_size - size of the vocabulary (number of unique tokens, depends on tokenizer configuration)
embed_size - the number of features to represent one token
Arguments for LSTM layer:
hidden_size – the number of features in the hidden state
proj_size – if > 0, will use LSTM with projections of corresponding size (instead of embed_size)
num_layers – number of recurrent layers
dropout – if non-zero, introduces a Dropout layer on the outputs of each LSTM layer except the last layer,
with dropout probability equal to dropout
bidirectional – if True, becomes a bidirectional LSTM
"""
def __init__(
self,
vocab_size: int,
embed_size: int,
hidden_size: int,
proj_size: int = 0,
num_layers: int = 1,
dropout: float = 0.,
bidirectional: bool = False
):
self.embed_size = embed_size
self.hidden_size = hidden_size
self.vocab_size = vocab_size
self.proj_size = proj_size
self.num_layers = num_layers
self.dropout = dropout
self.bidirectional = bidirectional
class DynamicLayerAttentionBlock(nn.Module):
def __init__(self, config):
super().__init__()
self.hidden_size = config.hidden_size
self.proj_size = config.proj_size if config.proj_size != 0 else config.embed_size
if config.bidirectional:
self.hidden_size *= 2
self.proj_size *= 2
self.W_Q = nn.Linear(self.hidden_size, self.proj_size, bias=False)
self.W_K = nn.Linear(self.hidden_size, self.proj_size, bias=False)
self.W_V = nn.Linear(self.hidden_size, self.proj_size, bias=False)
def forward(self, rnn_output):
Q = self.W_Q(rnn_output)
K = self.W_K(rnn_output)
V = self.W_V(rnn_output)
d_k = K.size(-1)
scores = torch.matmul(Q, K.transpose(1,2)) / np.sqrt(d_k)
alpha_n = F.softmax(scores, dim=-1)
context = torch.matmul(alpha_n, V)
output = context.sum(1)
return output, alpha_n
class DynamicLayer(nn.Module):
def __init__(self, config: DynamicLayerConfig):
super().__init__()
self.config = config
self.wte = nn.Embedding(self.config.vocab_size, self.config.embed_size)
self.lstm = nn.LSTM(
input_size=self.config.embed_size,
hidden_size=self.config.hidden_size,
proj_size=self.config.proj_size,
num_layers=self.config.num_layers,
dropout=self.config.dropout,
bidirectional=self.config.bidirectional,
batch_first=True,
)
self.attention = DynamicLayerAttentionBlock(self.config)
"""
Arguments:
input_ids - tensor of shape (batch_size, sequence_length). All values are in interval - [0, vocab_size).
These indices will be processed through nn.Embedding to obtain inputs_embeds of shape (batch_size, sequence_length, embed_size)
or
inputs_embeds - tensor of shape (batch_size, sequence_length, embed_size)
"""
def forward(
self,
input_ids: torch.LongTensor,
input_lens: torch.LongTensor,
) -> torch.FloatTensor:
input_embeds = self.wte(input_ids)
input_packed = pack_padded_sequence(input_embeds, input_lens, batch_first=True, enforce_sorted=False)
lstm_output, (hn, cn) = self.lstm(input_packed)
output_padded, output_lengths = pad_packed_sequence(lstm_output, batch_first=True)
output, _ = self.attention(output_padded)
return output
class Model(nn.Module):
def __init__(self, config: DynamicLayerConfig):
super().__init__()
self.proj_size = config.proj_size if config.proj_size != 0 else config.embed_size
if config.bidirectional:
self.proj_size *= 2
self.dynamic_layer = DynamicLayer(config)
self.fc = nn.Linear(self.proj_size, 1)
def forward(
self,
input_ids: torch.LongTensor,
input_lens: torch.LongTensor,
) -> torch.FloatTensor:
fixed_sized = self.dynamic_layer(input_ids, input_lens)
return torch.sigmoid(self.fc(fixed_sized))