add test and numpy/tf implementation for slice_nd

mikel-zhobro · mikel-zhobro · commit e432a2844a3a · 2021-06-20T16:12:29.000+02:00
diff --git a/returnn/tf/util/basic.py b/returnn/tf/util/basic.py
@@ -3631,6 +3631,33 @@ def windowed_nd(source, window_size, window_left=None, window_right=None,
     return final
 
 
+def slice_nd2(x, start, size):
+  """
+  This is a more generic slice function, where arbitrary many common axis between x and start are allowed.
+  Here we assume that x and start have their axis layed in the same order.
+
+  :param tf.Tensor x: shape (B, T1, ..., Tn, D)
+  :param tf.Tensor start: shape (B,T1 .., Tn-1), int32 which automatically indicates n as the slice-axis
+  :param int axis: in the range [0..n-1]
+  :param int|tf.Tensor size: scalar
+  :return: ret[b, t1, .., tn-1, 0..size, :] = x[b, t1, .., tn-1, start[B, t1, .., tn-1]+0..size, :]
+    In case the slices go out of bounds of the slice dimension and we will pad with zeros.
+  :rtype: tf.Tensor
+  """
+  with tf.name_scope("slice_nd"):
+    shape = x.shape
+    len_common_dims = len(start.shape)  # nr of common dims
+    slice_dim = shape[len_common_dims]  # dim of axis to be sliced
+    assert size < slice_dim, "Slice size cannot be bigger than the dimension to be sliced."
+    # Create indexes for the slices where slice_idx[B,T1 .., Tn-1] = start[B,T1 .., Tn-1] + range(size)
+    slice_idx = tf.tile(tf.expand_dims(start, -1), [1] * len_common_dims + [size]) + tf.range(size)  # (B,T1 .., Tn-1, size)
+    mask = tf.logical_or(tf.greater(slice_idx, slice_dim - 1), tf.less(slice_idx, 0))  # (B,T1 .., Tn-1, size)
+    slice_idx = tf.clip_by_value(slice_idx, 0, slice_dim - 1)  # cliped slice idx
+    res = tf.gather(x, slice_idx, axis=len_common_dims, batch_dims=len_common_dims)
+    res = where_bc(mask, tf.zeros_like(res), res)  # zero-padding
+    return res
+
+
 def slice_nd(x, start, size):
   """
   :param tf.Tensor x: shape (B, T, ...)
diff --git a/tests/test_TFUtil.py b/tests/test_TFUtil.py
@@ -1637,6 +1637,8 @@ def test_windowed_nd_big():
 
 
 def naive_slice_nd(x, start, size):
+  # old implementation, check out naive_slice_nd2
+
   slices_shape = [x.shape[0], size] + list(x.shape)[2:]
   ys = numpy.zeros(shape=slices_shape)
   for i in range(len(start)):
@@ -1653,6 +1655,63 @@ def naive_slice_nd(x, start, size):
   return ys
 
 
+def naive_slice_nd2(x, start, size):
+  # Assuming that x: [B, T1, T2, .., Tn, D] and start: [B, T1, .., Tn-1]
+  # i.e. the dimensions of x and start are ordered accordingly.
+  # (Otherwise we should require the slice axis too.)
+
+  len_common_dims = len(start.shape)
+  slice_shape = (size,) + x.shape[len_common_dims+1:]
+  result_shape = start.shape[0:len_common_dims] + slice_shape  # shape of output
+  result = numpy.zeros(result_shape)
+
+  slice_axis_dim = x.shape[len_common_dims]  # dim of axis being sliced
+  for index, start_position in numpy.ndenumerate(start):
+    end_position = min(start_position+size, slice_axis_dim)  # padding required
+
+    # no padding
+    padding = ((0,0),)
+    for i in range(1, len(slice_shape)):
+        padding += ((0, 0),)
+
+    # if required replace the first padding tuple, which corresponds to the slice axis
+    if end_position < start_position+size:
+      padding = ((0,size - end_position + start_position),) + padding[1:]
+    result[index] = numpy.pad(x[index][start_position:end_position], padding, mode='constant', constant_values=0)
+  return result
+
+
+def test_slice_nd_multi_dim():
+  n_batch = 2
+  n_time_1 = 2
+  n_time_2 = 3  # slice axis
+  n_dim = 2
+  size = 2
+  source = numpy.arange(24, dtype=numpy.float32).reshape(n_batch, n_time_1, n_time_2, n_dim).astype("float32")
+  start = numpy.array([[0,1],[1,2]]).astype("int32")
+  naive = naive_slice_nd2(source, start, size)
+  source_tf = tf.constant(source)
+  real = slice_nd2(source_tf, start=start, size=size).eval()
+  print("source:")
+  print(source)
+  print("naive:")
+  print(naive)
+  print("real:")
+  print(real)
+  expected_output = numpy.array(
+    [[[[0, 1],
+       [2, 3]],
+      [[8, 9],
+       [10, 11]]],
+
+     [[[14, 15],
+       [16, 17]],
+      [[22, 23],
+       [0, 0]]]])  # padding
+  numpy.testing.assert_almost_equal(naive, expected_output)
+  numpy.testing.assert_almost_equal(real, expected_output)
+
+
 def test_slice_nd_small():
   n_batch = 3
   n_time = 4
@@ -1662,7 +1721,7 @@ def test_slice_nd_small():
   source = numpy.arange(1, n_batch*n_time*n_dim + 1, dtype=numpy.float32).reshape(n_batch, n_time, n_dim).astype("float32")
   source_tf = tf.constant(source)
   naive = naive_slice_nd(source, start, size)
-  real = slice_nd(source_tf, start=start, size=size).eval()
+  real = slice_nd2(source_tf, start=start, size=size).eval()
   print("source:")
   print(source)
   print("naive:")
@@ -1682,7 +1741,7 @@ def test_slice_nd_big():
   source = numpy.random.random((n_batch, n_time, n_dim)).astype("float32")
   source_tf = tf.constant(source)
   naive = naive_slice_nd(source, start, size)
-  real = slice_nd(source_tf, start=start, size=size).eval()
+  real = slice_nd2(source_tf, start=start, size=size).eval()
   print("source:")
   print(source)
   print("naive:")