[cm] Refactoring TCN

christhetree · christhetree · commit 306abb3c2214 · 2023-12-25T12:09:19.000-04:00
diff --git a/neutone_sdk/tcn.py b/neutone_sdk/tcn.py
@@ -15,8 +15,8 @@
 
 class FiLM(nn.Module):
     def __init__(self,
-                 cond_dim: int,  # dim of conditioning input
-                 num_features: int,  # dim of the conv channel
+                 cond_dim: int,  # Dim of conditioning input
+                 num_features: int,  # Dim of the conv channel
                  use_bn: bool) -> None:
         super().__init__()
         self.num_features = num_features
@@ -47,8 +47,8 @@ def __init__(self,
                  dilation: int = 1,
                  bias: bool = True,
                  padding_mode: str = "zeros",
-                 is_causal: bool = True,
-                 is_cached: bool = False,
+                 causal: bool = True,
+                 cached: bool = False,
                  use_dynamic_bs: bool = True,
                  batch_size: int = 1,
                  use_ln: bool = False,
@@ -85,8 +85,8 @@ def __init__(self,
                                   dilation=dilation,
                                   bias=bias,
                                   padding_mode=padding_mode,
-                                  causal=is_causal,
-                                  cached=is_cached,
+                                  causal=causal,
+                                  cached=cached,
                                   use_dynamic_bs=use_dynamic_bs,
                                   batch_size=batch_size,
                                   debug_mode=debug_mode)
@@ -151,7 +151,7 @@ def prepare_for_inference(self) -> None:
         """
         self.debug_mode = False
         self.conv.prepare_for_inference()
-        self.eval()  # TODO(cm): check if this is applied to all modules recursively
+        self.eval()
 
     def forward(self, x: Tensor, cond: Optional[Tensor] = None) -> Tensor:
         if self.debug_mode:
@@ -166,55 +166,63 @@ def forward(self, x: Tensor, cond: Optional[Tensor] = None) -> Tensor:
         if self.film is not None:
             if self.debug_mode:
                 assert cond is not None
-            x = self.film(x, cond)
+            if cond is not None:  # This if statement is needed for TorchScript
+                x = self.film(x, cond)
         if self.act is not None:
             x = self.act(x)
         if self.res is not None:
             res = self.res(x_in)
-            x_res = self.crop_fn(res, x.size(-1))  # TODO
+            right_offset = self.get_delay_samples()
+            x_res = Conv1dGeneral.right_offset_crop(res, x.size(-1), right_offset)
             x += x_res
         return x
 
 
 class TCN(nn.Module):
     def __init__(self,
+                 in_channels: int,
                  out_channels: List[int],
-                 dilations: Optional[List[int]] = None,
-                 in_ch: int = 1,
-                 kernel_size: int = 13,
+                 kernel_size: int = 3,
                  strides: Optional[List[int]] = None,
-                 padding: Optional[int] = 0,
+                 padding: Union[str, int, Tuple[int]] = "same",
+                 dilations: Optional[List[int]] = None,
+                 bias: bool = True,
+                 padding_mode: str = "zeros",
+                 causal: bool = True,
+                 cached: bool = False,
+                 use_dynamic_bs: bool = True,
+                 batch_size: int = 1,
                  use_ln: bool = False,
                  temporal_dims: Optional[List[int]] = None,
                  use_act: bool = True,
                  use_res: bool = True,
                  cond_dim: int = 0,
-                 use_film_bn: bool = False,
-                 is_causal: bool = True,
-                 is_cached: bool = False) -> None:
+                 use_film_bn: bool = True,  # TODO(cm): check if this should be false
+                 debug_mode: bool = True) -> None:
         super().__init__()
+        self.in_channels = in_channels
         self.out_channels = out_channels
-        self.in_ch = in_ch
-        self.out_ch = out_channels[-1]
         self.kernel_size = kernel_size
+        self.strides = strides
         self.padding = padding
+        self.dilations = dilations
+        self.bias = bias
+        self.padding_mode = padding_mode
+        self.causal = causal
+        self.cached = cached
+        self.use_dynamic_bs = use_dynamic_bs
+        self.batch_size = batch_size
         self.use_ln = use_ln
-        self.temporal_dims = temporal_dims  # TODO(cm): calculate automatically
+        self.temporal_dims = temporal_dims
         self.use_act = use_act
         self.use_res = use_res
         self.cond_dim = cond_dim
         self.use_film_bn = use_film_bn
-        self.is_causal = is_causal
-        self.is_cached = is_cached
-        if is_causal:
-            assert padding == 0, "If the TCN is causal, padding must be 0"
-            self.crop_fn = causal_crop
-        else:
-            self.crop_fn = center_crop
-        if is_cached:
-            assert is_causal, "If the TCN is streaming, it must be causal"
+        self.debug_mode = debug_mode
 
         self.n_blocks = len(out_channels)
+        assert self.n_blocks > 0
+
         if dilations is None:
             dilations = [4 ** idx for idx in range(self.n_blocks)]
             log.info(f"Setting dilations automatically to: {dilations}")
@@ -223,7 +231,7 @@ def __init__(self,
 
         if strides is None:
             strides = [1] * self.n_blocks
-            log.info(f"Setting strides automatically to: {strides}")
+            log.info(f"Setting strides   automatically to: {strides}")
         assert len(strides) == self.n_blocks
         self.strides = strides
 
@@ -233,9 +241,11 @@ def __init__(self,
 
         self.blocks = nn.ModuleList()
         block_out_ch = None
-        for idx, (curr_out_ch, dil, stride) in enumerate(zip(out_channels, dilations, strides)):
+        for idx, (curr_out_ch, dil, stride) in enumerate(zip(out_channels,
+                                                             dilations,
+                                                             strides)):
             if idx == 0:
-                block_in_ch = in_ch
+                block_in_ch = in_channels
             else:
                 block_in_ch = block_out_ch
             block_out_ch = curr_out_ch
@@ -244,51 +254,115 @@ def __init__(self,
             if temporal_dims is not None:
                 temp_dim = temporal_dims[idx]
 
-            self.blocks.append(TCNBlock(
-                block_in_ch,
-                block_out_ch,
-                kernel_size,
-                dil,
-                stride,
-                padding,
-                use_ln,
-                temp_dim,
-                use_act,
-                use_res,
-                cond_dim,
-                use_film_bn,
-                is_causal,
-                is_cached
-            ))
+            self.blocks.append(TCNBlock(block_in_ch,
+                                        block_out_ch,
+                                        kernel_size,
+                                        stride,
+                                        padding,
+                                        dil,
+                                        bias,
+                                        padding_mode,
+                                        causal,
+                                        cached,
+                                        use_dynamic_bs,
+                                        batch_size,
+                                        use_ln,
+                                        temp_dim,
+                                        use_act,
+                                        use_res,
+                                        cond_dim,
+                                        use_film_bn,
+                                        debug_mode))
 
+    @tr.jit.export
     def is_conditional(self) -> bool:
+        """Returns True if the TCN is conditional, False otherwise."""
         return self.cond_dim > 0
 
-    def forward(self, x: Tensor, cond: Optional[Tensor] = None) -> Tensor:
-        assert x.ndim == 3  # (batch_size, in_ch, samples)
-        if self.is_conditional():
-            assert cond is not None
-            assert cond.shape == (x.size(0), self.cond_dim)  # (batch_size, cond_dim)
+    @tr.jit.export
+    def is_cached(self) -> bool:
+        """Returns True if the TCN is cached, False otherwise."""
+        return self.cached
+
+    @tr.jit.export
+    def set_cached(self, cached: bool) -> None:
+        """
+        Sets the TCN to cached or not cached mode and resets its state.
+
+        Args:
+            cached: If True, the TCN is cached. If False, it is not cached.
+        """
+        self.cached = cached
         for block in self.blocks:
-            x = block(x, cond)
-        return x
+            block.set_cached(cached)
+
+    @tr.jit.export
+    def reset(self, batch_size: Optional[int] = None) -> None:
+        """
+        Resets the TCN's state. If batch_size is provided, the cached padding
+        will be resized to match the new batch size.
+
+        Args:
+            batch_size: If provided, the cached padding will be resized to match the new
+                        batch size.
+        """
+        for block in self.blocks:
+            block.reset(batch_size)
+
+    @tr.jit.export
+    def get_delay_samples(self) -> int:
+        """
+        Returns the number of samples that the TCN delays the output by. This
+        should always be 0 when the TCN is causal. This is ill-defined when not
+        in cached mode since the output number of samples can be different than the
+        input number of samples, so this would typically only be used in cached mode.
+        """
+        # TODO(cm): verify this
+        delay_samples = 0
+        for block in self.blocks:
+            delay_samples += block.get_delay_samples()
+        return delay_samples
 
+    @tr.jit.export
     def calc_receptive_field(self) -> int:
-        """Compute the receptive field in samples."""
+        """Computes the receptive field of the TCN in samples."""
         assert all(_ == 1 for _ in self.strides)  # TODO(cm): add support for dsTCN
         assert self.dilations[0] == 1  # TODO(cm): add support for >1 starting dilation
         rf = self.kernel_size
         for dil in self.dilations[1:]:
-            rf = rf + ((self.kernel_size - 1) * dil)
+            rf += ((self.kernel_size - 1) * dil)
         return rf
 
+    def prepare_for_inference(self) -> None:
+        """
+        Prepares the TCN for inference by disabling debug mode and ensuring the
+        TCN is in cached mode.
+        """
+        self.debug_mode = False
+        for block in self.blocks:
+            block.prepare_for_inference()
+        self.eval()
+
+    def forward(self, x: Tensor, cond: Optional[Tensor] = None) -> Tensor:
+        if self.debug_mode:
+            assert x.ndim == 3  # (bs, in_ch, samples)
+            if self.is_conditional():
+                assert cond is not None
+                assert cond.shape == (x.size(0), self.cond_dim)  # (bs, cond_dim)
+        for block in self.blocks:
+            x = block(x, cond)
+        return x
+
 
 if __name__ == '__main__':
     out_channels = [8] * 4
-    tcn = TCN(out_channels, cond_dim=3, padding=0, is_causal=True, is_cached=True)
-    log.info(tcn.calc_receptive_field())
+    tcn = TCN(1, out_channels, cond_dim=3, causal=False, cached=False, padding="valid")
+    log.info(f"Receptive field: {tcn.calc_receptive_field()}")
+    log.info(f"Delay samples:   {tcn.get_delay_samples()}")
     audio = tr.rand((1, 1, 65536))
     cond = tr.rand((1, 3))
     # cond = None
     out = tcn.forward(audio, cond)
     log.info(out.shape)
+
+    script = tr.jit.script(tcn)