RelativePositionalEncoding not needed

albertz · albertz · commit 07d3b69320f2 · 2021-11-05T17:32:25.000+01:00
We will explicitly reimplement that.
diff --git a/nn/_generate_layers.py b/nn/_generate_layers.py
@@ -68,6 +68,7 @@
   "concat_attention",
   "gauss_window_attention",
   "self_attention",
+  "relative_positional_encoding",
 }
 
 LayersHidden = {
diff --git a/nn/_generated_layers.py b/nn/_generated_layers.py
@@ -6836,79 +6836,6 @@ def make_layer_dict(self,
       **self.get_opts()}
 
 
-class RelativePositionalEncoding(_Base):
-  """
-  Relative positioning term as introduced by Shaw et al., 2018
-
-  Usually added to Self-Attention using key_shift.
-  Parts of the code are adapted from Tensor2Tensor (https://github.com/tensorflow/tensor2tensor).
-
-  Example usage::
-
-      d[output + '_rel_pos'] = {"class": "relative_positional_encoding",
-                                "from": [output + '_self_att_laynorm'],
-                                "n_out": self.EncKeyTotalDim // self.AttNumHeads,
-                                "forward_weights_init": self.ff_init}
-      d[output + '_self_att_att'] = {"class": "self_attention",
-                                     "num_heads": self.AttNumHeads,
-                                     "total_key_dim": self.EncKeyTotalDim,
-                                     "n_out": self.EncValueTotalDim, "from": [output + '_self_att_laynorm'],
-                                     "attention_left_only": False, "attention_dropout": self.attention_dropout,
-                                     "forward_weights_init": self.ff_init,
-                                     "key_shift": output + '_rel_pos'}
-
-  """
-  returnn_layer_class = 'relative_positional_encoding'
-  has_recurrent_state = False
-  has_variables = True
-
-  # noinspection PyShadowingBuiltins,PyShadowingNames
-  def __init__(self,
-               n_out: int,
-               *,
-               forward_weights_init: str = NotSpecified,
-               clipping: int = NotSpecified,
-               fixed: bool = NotSpecified,
-               **kwargs):
-    """
-    :param int n_out: Feature dimension of encoding.
-    :param str forward_weights_init: see :func:`returnn.tf.util.basic.get_initializer`
-    :param int clipping: After which distance to fallback to the last encoding
-    :param bool fixed: Uses sinusoid positional encoding instead of learned parameters
-    """
-    super().__init__(**kwargs)
-    self.n_out = n_out
-    self.forward_weights_init = forward_weights_init
-    self.clipping = clipping
-    self.fixed = fixed
-
-  def get_opts(self):
-    """
-    Return all options
-    """
-    opts = {
-      'n_out': self.n_out,
-      'forward_weights_init': self.forward_weights_init,
-      'clipping': self.clipping,
-      'fixed': self.fixed,
-    }
-    opts = {key: value for (key, value) in opts.items() if value is not NotSpecified}
-    return {**opts, **super().get_opts()}
-
-  # noinspection PyShadowingBuiltins,PyShadowingNames
-  def make_layer_dict(self,
-                      source: LayerRef,
-                      ) -> LayerDictRaw:
-    """
-    Make layer dict
-    """
-    assert isinstance(source, LayerRef)
-    return {
-      'class': 'relative_positional_encoding',
-      'from': source,
-      **self.get_opts()}
-
-
 class _CumConcat(_Base):
   """
   Concatenates all previous frames of a time-axis.

Original file line number	Diff line number	Diff line change
`@@ -68,6 +68,7 @@`
`68`	`68`	`"concat_attention",`
`69`	`69`	`"gauss_window_attention",`
`70`	`70`	`"self_attention",`
	`71`	`+ "relative_positional_encoding",`
`71`	`72`	`}`
`72`	`73`
`73`	`74`	`LayersHidden = {`