Source code for supar.modules.affine

# -*- coding: utf-8 -*-

from __future__ import annotations

from typing import Callable, Optional

import torch
import torch.nn as nn
from supar.modules.mlp import MLP


[docs]class Biaffine(nn.Module): r""" Biaffine layer for first-order scoring :cite:`dozat-etal-2017-biaffine`. This function has a tensor of weights :math:`W` and bias terms if needed. The score :math:`s(x, y)` of the vector pair :math:`(x, y)` is computed as :math:`x^T W y / d^s`, where `d` and `s` are vector dimension and scaling factor respectively. :math:`x` and :math:`y` can be concatenated with bias terms. Args: n_in (int): The size of the input feature. n_out (int): The number of output channels. n_proj (Optional[int]): If specified, applies MLP layers to reduce vector dimensions. Default: ``None``. dropout (Optional[float]): If specified, applies a :class:`SharedDropout` layer with the ratio on MLP outputs. Default: 0. scale (float): Factor to scale the scores. Default: 0. bias_x (bool): If ``True``, adds a bias term for tensor :math:`x`. Default: ``True``. bias_y (bool): If ``True``, adds a bias term for tensor :math:`y`. Default: ``True``. decompose (bool): If ``True``, represents the weight as the product of 2 independent matrices. Default: ``False``. init (Callable): Callable initialization method. Default: `nn.init.zeros_`. """ def __init__( self, n_in: int, n_out: int = 1, n_proj: Optional[int] = None, dropout: Optional[float] = 0, scale: int = 0, bias_x: bool = True, bias_y: bool = True, decompose: bool = False, init: Callable = nn.init.zeros_ ) -> Biaffine: super().__init__() self.n_in = n_in self.n_out = n_out self.n_proj = n_proj self.dropout = dropout self.scale = scale self.bias_x = bias_x self.bias_y = bias_y self.decompose = decompose self.init = init if n_proj is not None: self.mlp_x, self.mlp_y = MLP(n_in, n_proj, dropout), MLP(n_in, n_proj, dropout) self.n_model = n_proj or n_in if not decompose: self.weight = nn.Parameter(torch.Tensor(n_out, self.n_model + bias_x, self.n_model + bias_y)) else: self.weight = nn.ParameterList((nn.Parameter(torch.Tensor(n_out, self.n_model + bias_x)), nn.Parameter(torch.Tensor(n_out, self.n_model + bias_y)))) self.reset_parameters() def __repr__(self): s = f"n_in={self.n_in}" if self.n_out > 1: s += f", n_out={self.n_out}" if self.n_proj is not None: s += f", n_proj={self.n_proj}" if self.dropout > 0: s += f", dropout={self.dropout}" if self.scale != 0: s += f", scale={self.scale}" if self.bias_x: s += f", bias_x={self.bias_x}" if self.bias_y: s += f", bias_y={self.bias_y}" if self.decompose: s += f", decompose={self.decompose}" return f"{self.__class__.__name__}({s})" def reset_parameters(self): if self.decompose: for i in self.weight: self.init(i) else: self.init(self.weight)
[docs] def forward( self, x: torch.Tensor, y: torch.Tensor ) -> torch.Tensor: r""" Args: x (torch.Tensor): ``[batch_size, seq_len, n_in]``. y (torch.Tensor): ``[batch_size, seq_len, n_in]``. Returns: ~torch.Tensor: A scoring tensor of shape ``[batch_size, n_out, seq_len, seq_len]``. If ``n_out=1``, the dimension for ``n_out`` will be squeezed automatically. """ if hasattr(self, 'mlp_x'): x, y = self.mlp_x(x), self.mlp_y(y) if self.bias_x: x = torch.cat((x, torch.ones_like(x[..., :1])), -1) if self.bias_y: y = torch.cat((y, torch.ones_like(y[..., :1])), -1) # [batch_size, n_out, seq_len, seq_len] if self.decompose: wx = torch.einsum('bxi,oi->box', x, self.weight[0]) wy = torch.einsum('byj,oj->boy', y, self.weight[1]) s = torch.einsum('box,boy->boxy', wx, wy) else: s = torch.einsum('bxi,oij,byj->boxy', x, self.weight, y) return s.squeeze(1) / self.n_in ** self.scale
[docs]class Triaffine(nn.Module): r""" Triaffine layer for second-order scoring :cite:`zhang-etal-2020-efficient,wang-etal-2019-second`. This function has a tensor of weights :math:`W` and bias terms if needed. The score :math:`s(x, y, z)` of the vector triple :math:`(x, y, z)` is computed as :math:`x^T z^T W y / d^s`, where `d` and `s` are vector dimension and scaling factor respectively. :math:`x` and :math:`y` can be concatenated with bias terms. Args: n_in (int): The size of the input feature. n_out (int): The number of output channels. n_proj (Optional[int]): If specified, applies MLP layers to reduce vector dimensions. Default: ``None``. dropout (Optional[float]): If specified, applies a :class:`SharedDropout` layer with the ratio on MLP outputs. Default: 0. scale (float): Factor to scale the scores. Default: 0. bias_x (bool): If ``True``, adds a bias term for tensor :math:`x`. Default: ``False``. bias_y (bool): If ``True``, adds a bias term for tensor :math:`y`. Default: ``False``. decompose (bool): If ``True``, represents the weight as the product of 3 independent matrices. Default: ``False``. init (Callable): Callable initialization method. Default: `nn.init.zeros_`. """ def __init__( self, n_in: int, n_out: int = 1, n_proj: Optional[int] = None, dropout: Optional[float] = 0, scale: int = 0, bias_x: bool = False, bias_y: bool = False, decompose: bool = False, init: Callable = nn.init.zeros_ ) -> Triaffine: super().__init__() self.n_in = n_in self.n_out = n_out self.n_proj = n_proj self.dropout = dropout self.scale = scale self.bias_x = bias_x self.bias_y = bias_y self.decompose = decompose self.init = init if n_proj is not None: self.mlp_x = MLP(n_in, n_proj, dropout) self.mlp_y = MLP(n_in, n_proj, dropout) self.mlp_z = MLP(n_in, n_proj, dropout) self.n_model = n_proj or n_in if not decompose: self.weight = nn.Parameter(torch.Tensor(n_out, self.n_model + bias_x, self.n_model, self.n_model + bias_y)) else: self.weight = nn.ParameterList((nn.Parameter(torch.Tensor(n_out, self.n_model + bias_x)), nn.Parameter(torch.Tensor(n_out, self.n_model)), nn.Parameter(torch.Tensor(n_out, self.n_model + bias_y)))) self.reset_parameters() def __repr__(self): s = f"n_in={self.n_in}" if self.n_out > 1: s += f", n_out={self.n_out}" if self.n_proj is not None: s += f", n_proj={self.n_proj}" if self.dropout > 0: s += f", dropout={self.dropout}" if self.scale != 0: s += f", scale={self.scale}" if self.bias_x: s += f", bias_x={self.bias_x}" if self.bias_y: s += f", bias_y={self.bias_y}" if self.decompose: s += f", decompose={self.decompose}" return f"{self.__class__.__name__}({s})" def reset_parameters(self): if self.decompose: for i in self.weight: self.init(i) else: self.init(self.weight)
[docs] def forward( self, x: torch.Tensor, y: torch.Tensor, z: torch.Tensor ) -> torch.Tensor: r""" Args: x (torch.Tensor): ``[batch_size, seq_len, n_in]``. y (torch.Tensor): ``[batch_size, seq_len, n_in]``. z (torch.Tensor): ``[batch_size, seq_len, n_in]``. Returns: ~torch.Tensor: A scoring tensor of shape ``[batch_size, n_out, seq_len, seq_len, seq_len]``. If ``n_out=1``, the dimension for ``n_out`` will be squeezed automatically. """ if hasattr(self, 'mlp_x'): x, y, z = self.mlp_x(x), self.mlp_y(y), self.mlp_z(y) if self.bias_x: x = torch.cat((x, torch.ones_like(x[..., :1])), -1) if self.bias_y: y = torch.cat((y, torch.ones_like(y[..., :1])), -1) # [batch_size, n_out, seq_len, seq_len, seq_len] if self.decompose: wx = torch.einsum('bxi,oi->box', x, self.weight[0]) wz = torch.einsum('bzk,ok->boz', z, self.weight[1]) wy = torch.einsum('byj,oj->boy', y, self.weight[2]) s = torch.einsum('box,boz,boy->bozxy', wx, wz, wy) else: w = torch.einsum('bzk,oikj->bozij', z, self.weight) s = torch.einsum('bxi,bozij,byj->bozxy', x, w, y) return s.squeeze(1) / self.n_in ** self.scale