@@ -6836,79 +6836,6 @@ def make_layer_dict(self,
6836
6836
** self .get_opts ()}
6837
6837
6838
6838
6839
- class RelativePositionalEncoding (_Base ):
6840
- """
6841
- Relative positioning term as introduced by Shaw et al., 2018
6842
-
6843
- Usually added to Self-Attention using key_shift.
6844
- Parts of the code are adapted from Tensor2Tensor (https://github.com/tensorflow/tensor2tensor).
6845
-
6846
- Example usage::
6847
-
6848
- d[output + '_rel_pos'] = {"class": "relative_positional_encoding",
6849
- "from": [output + '_self_att_laynorm'],
6850
- "n_out": self.EncKeyTotalDim // self.AttNumHeads,
6851
- "forward_weights_init": self.ff_init}
6852
- d[output + '_self_att_att'] = {"class": "self_attention",
6853
- "num_heads": self.AttNumHeads,
6854
- "total_key_dim": self.EncKeyTotalDim,
6855
- "n_out": self.EncValueTotalDim, "from": [output + '_self_att_laynorm'],
6856
- "attention_left_only": False, "attention_dropout": self.attention_dropout,
6857
- "forward_weights_init": self.ff_init,
6858
- "key_shift": output + '_rel_pos'}
6859
-
6860
- """
6861
- returnn_layer_class = 'relative_positional_encoding'
6862
- has_recurrent_state = False
6863
- has_variables = True
6864
-
6865
- # noinspection PyShadowingBuiltins,PyShadowingNames
6866
- def __init__ (self ,
6867
- n_out : int ,
6868
- * ,
6869
- forward_weights_init : str = NotSpecified ,
6870
- clipping : int = NotSpecified ,
6871
- fixed : bool = NotSpecified ,
6872
- ** kwargs ):
6873
- """
6874
- :param int n_out: Feature dimension of encoding.
6875
- :param str forward_weights_init: see :func:`returnn.tf.util.basic.get_initializer`
6876
- :param int clipping: After which distance to fallback to the last encoding
6877
- :param bool fixed: Uses sinusoid positional encoding instead of learned parameters
6878
- """
6879
- super ().__init__ (** kwargs )
6880
- self .n_out = n_out
6881
- self .forward_weights_init = forward_weights_init
6882
- self .clipping = clipping
6883
- self .fixed = fixed
6884
-
6885
- def get_opts (self ):
6886
- """
6887
- Return all options
6888
- """
6889
- opts = {
6890
- 'n_out' : self .n_out ,
6891
- 'forward_weights_init' : self .forward_weights_init ,
6892
- 'clipping' : self .clipping ,
6893
- 'fixed' : self .fixed ,
6894
- }
6895
- opts = {key : value for (key , value ) in opts .items () if value is not NotSpecified }
6896
- return {** opts , ** super ().get_opts ()}
6897
-
6898
- # noinspection PyShadowingBuiltins,PyShadowingNames
6899
- def make_layer_dict (self ,
6900
- source : LayerRef ,
6901
- ) -> LayerDictRaw :
6902
- """
6903
- Make layer dict
6904
- """
6905
- assert isinstance (source , LayerRef )
6906
- return {
6907
- 'class' : 'relative_positional_encoding' ,
6908
- 'from' : source ,
6909
- ** self .get_opts ()}
6910
-
6911
-
6912
6839
class _CumConcat (_Base ):
6913
6840
"""
6914
6841
Concatenates all previous frames of a time-axis.
0 commit comments