diff --git a/sockeye/layers.py b/sockeye/layers.py index a9b230075..4664e54b4 100644 --- a/sockeye/layers.py +++ b/sockeye/layers.py @@ -639,7 +639,7 @@ def forward(self, queries: pt.Tensor, key_values: pt.Tensor, mask: Optional[pt.Tensor] = None, - projected_memory_kv: Optional[pt.Tensor] = None) -> pt.Tensor: # mypy: ignore + projected_memory_kv: Optional[pt.Tensor] = None) -> pt.Tensor: """ Computes multi-head attention for queries given a memory tensor. If sequence lengths are provided, they will be used to mask the attention scores.