From 0d6f66e3acb972ae5c3260640527bcc40148c852 Mon Sep 17 00:00:00 2001
From: MB <maris.basha1@gmail.com>
Date: Sun, 17 Sep 2023 22:00:54 +0200
Subject: [PATCH 001/150] DEV: just to push on branch

---
 src/vak/models/vae_model.py | 97 +++++++++++++++++++++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 src/vak/models/vae_model.py

diff --git a/src/vak/models/vae_model.py b/src/vak/models/vae_model.py
new file mode 100644
index 000000000..cd48530ea
--- /dev/null
+++ b/src/vak/models/vae_model.py
@@ -0,0 +1,97 @@
+from __future__ import annotations
+
+import pathlib
+from typing import Callable, ClassVar, Type
+
+import pytorch_lightning as lightning
+import torch
+import torch.utils.data
+from torch import nn
+from operator import itemgetter
+
+from .registry import model_family
+from . import base
+from .definition import ModelDefinition
+
+@model_family
+class VAEModel(base.Model):
+    definition: ClassVar[ModelDefinition]
+    def __init__(
+        self,
+        network: dict | None = None,
+        loss: torch.nn.Module | Callable | None = None,
+        optimizer: torch.optim.Optimizer | None = None,
+        metrics: dict[str:Type] | None = None,
+    ):
+        super().__init__(
+            network=network, loss=loss, optimizer=optimizer, metrics=metrics
+        )
+        self.encoder = network['encode']
+        self.decoder = network['decode']
+
+    def forward(self, x):
+        out, _ = self.network(x)
+        return out
+
+    def encode(self, x):
+        return self.encoder(x)
+    
+    def decode(self, x):
+        return self.decoder(x)
+
+    def configure_optimizers(self):
+        return self.optimizer
+
+    def training_step(self, batch: tuple, batch_idx: int):
+        """
+        """
+        x = batch[0]
+        out, z, latent_dist= self.network(x)
+        loss = self.loss(x, z, out, latent_dist)
+        self.log("train_loss", loss)
+        return loss
+    
+    def training_step(self, batch: tuple, batch_idx: int):
+        """
+        """
+        
+        x = batch[0]
+        x = batch[0]
+        out, _ = self.network(x)
+        z, latent_dist  = itemgetter('z', 'latent_dist')(_)
+        loss = self.loss(x, z, out, latent_dist)
+        self.log("train_loss", loss)
+        return loss
+
+    def validation_step(self, batch: tuple, batch_idx: int):
+        x = batch["frames"]
+        x = batch[0]
+        out, _ = self.network(x)
+        z, latent_dist  = itemgetter('z', 'latent_dist')(_)
+        for metric_name, metric_callable in self.metrics.items():
+            if metric_name == "loss":
+                self.log(
+                    f"val_{metric_name}",
+                    metric_callable(x, z, out, latent_dist),
+                    batch_size=1,
+                    on_step=True,
+                )
+            elif metric_name == "acc":
+                self.log(
+                    f"val_{metric_name}",
+                    metric_callable(out, x),
+                    batch_size=1,
+                    on_step=True,
+                )
+
+    @classmethod
+    def from_config(
+        cls, config: dict
+    ):
+        network, loss, optimizer, metrics = cls.attributes_from_config(config)
+        return cls(
+            network=network,
+            optimizer=optimizer,
+            loss=loss,
+            metrics=metrics,
+        )
\ No newline at end of file

From 3aeb576cc6d47968617f5c5fbbb3e0dad904538d Mon Sep 17 00:00:00 2001
From: MB <maris.basha1@gmail.com>
Date: Fri, 1 Sep 2023 19:18:19 +0200
Subject: [PATCH 002/150] DEV: Initial notebook implementation VAE + AVA.

---
 .gitignore     |   5 +-
 test_vae.ipynb | 303 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 307 insertions(+), 1 deletion(-)
 create mode 100644 test_vae.ipynb

diff --git a/.gitignore b/.gitignore
index d15377a62..77e8199f0 100644
--- a/.gitignore
+++ b/.gitignore
@@ -26,4 +26,7 @@ tests/data_for_tests/generated/
 
 # coverage / pytest-cov
 .coverage
-coverage.xml
\ No newline at end of file
+coverage.xml
+
+#nox 
+.nox/
\ No newline at end of file
diff --git a/test_vae.ipynb b/test_vae.ipynb
new file mode 100644
index 000000000..c921a5b1f
--- /dev/null
+++ b/test_vae.ipynb
@@ -0,0 +1,303 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from __future__ import annotations\n",
+    "\n",
+    "import pathlib\n",
+    "from typing import Callable, ClassVar, Type\n",
+    "\n",
+    "import pytorch_lightning as lightning\n",
+    "import torch\n",
+    "import torch.utils.data\n",
+    "from torch import nn\n",
+    "\n",
+    "# The src.vak.models prefix has to be removed in the actual implementation\n",
+    "from src.vak.models.registry import model_family\n",
+    "from src.vak.models import base\n",
+    "from src.vak.models.definition import ModelDefinition"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# vak.nn.loss.vae\n",
+    "def vae_loss(\n",
+    "    x: torch.Tensor,\n",
+    "    z: torch.Tensor,\n",
+    "    x_rec: torch.Tensor,\n",
+    "    latent_dist: torch.Tensor,\n",
+    "    model_precision: float,\n",
+    "    z_dim: int\n",
+    "):\n",
+    "\n",
+    "    x_dim = x.shape\n",
+    "    elbo = -0.5 * ( torch.sum( torch.pow(z, 2) ) + z_dim * np.log( 2 * np.pi ))\n",
+    "    # E_{q(z|x)} p(x|z)\n",
+    "    pxz_term = -0.5 * x_dim * (np.log(2 * np.pi / model_precision))\n",
+    "    l2s = torch.sum( torch.pow( x.view( x.shape[0], -1 ) - x_rec, 2), dim=1)\n",
+    "    pxz_term = pxz_term - 0.5 * model_precision * torch.sum(l2s)\n",
+    "    elbo = elbo + pxz_term\n",
+    "    # H[q(z|x)]\n",
+    "    elbo = elbo + torch.sum(latent_dist.entropy())\n",
+    "    return elbo\n",
+    "\n",
+    "class VaeLoss(torch.nn.Module):\n",
+    "    \"\"\"\"\"\"\n",
+    "\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        return_latent_rec: bool = False,\n",
+    "        model_precision: float = 10.0,\n",
+    "        z_dim: int = 32\n",
+    "    ):\n",
+    "        super().__init__()\n",
+    "        self.return_latent_rec = return_latent_rec\n",
+    "        self.model_precision = model_precision\n",
+    "        self.z_dim = z_dim\n",
+    "\n",
+    "    def forward(\n",
+    "        self,\n",
+    "        x: torch.Tensor,\n",
+    "        z: torch.Tensor,\n",
+    "        x_rec: torch.Tensor,\n",
+    "        latent_dist: torch.Tensor,\n",
+    "    ):\n",
+    "        x_shape = x.shape\n",
+    "        elbo = vae_loss(x=x, z=z, x_rec=x_rec, latent_dist=latent_dist, model_precision=self.model_precision, z_dim=self.z_dim)\n",
+    "        if self.return_latent_rec:\n",
+    "            return -elbo, z.detach().cpu().numpy(), \\\n",
+    "                x_rec.view(-1, x_shape[0], x_shape[1]).detach().cpu().numpy()\n",
+    "        return -elbo\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/mb/Library/Mobile Documents/com~apple~CloudDocs/gits/vak/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "# vak.models.vae_model.VAEModel\n",
+    "@model_family\n",
+    "class VAEModel(base.Model):\n",
+    "    definition: ClassVar[ModelDefinition]\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        network: dict | None = None,\n",
+    "        loss: torch.nn.Module | Callable | None = None,\n",
+    "        optimizer: torch.optim.Optimizer | None = None,\n",
+    "        metrics: dict[str:Type] | None = None,\n",
+    "    ):\n",
+    "        super().__init__(\n",
+    "            network=network, loss=loss, optimizer=optimizer, metrics=metrics\n",
+    "        )\n",
+    "        self.encoder = network['encode']\n",
+    "        self.decoder = network['decode']\n",
+    "\n",
+    "    def forward(self, x):\n",
+    "        return self.network(x)\n",
+    "\n",
+    "    def encode(self, x):\n",
+    "        return self.encoder(x)\n",
+    "    \n",
+    "    def decode(self, x):\n",
+    "        return self.decoder(x)\n",
+    "\n",
+    "    def configure_optimizers(self):\n",
+    "        return self.optimizer\n",
+    "\n",
+    "    @classmethod\n",
+    "    def from_config(\n",
+    "        cls, config: dict\n",
+    "    ):\n",
+    "        network, loss, optimizer, metrics = cls.attributes_from_config(config)\n",
+    "        return cls(\n",
+    "            network=network,\n",
+    "            optimizer=optimizer,\n",
+    "            loss=loss,\n",
+    "            metrics=metrics,\n",
+    "        )\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# nets.Ava\n",
+    "class Ava(nn.Module):\n",
+    "    \"\"\"\n",
+    "    \"\"\"\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        hidden_dims: List[int] = [8, 8, 16, 16, 24, 24, 32]\n",
+    "\t\tfc_dims: List[int] = [1024, 256, 64, 32]\n",
+    "\t\tin_channels: int = 1,\n",
+    "\t\tin_fc: int = 8192,\n",
+    "\t\tx_shape = tuple = (128, 128)\n",
+    "\t\t\n",
+    "    ):\n",
+    "        \"\"\"\n",
+    "        \"\"\"\n",
+    "        super().__init__()\n",
+    "\t\tself.in_fc = in_fc\n",
+    "\t\tself.in_channels = in_channels\n",
+    "\t\tself.x_shape = x_shape \n",
+    "\t\tself.x_dim = np.prod(x_shape)\n",
+    "\t\tmodules = []\n",
+    "\t\tfor h_dim in hidden_dims:\n",
+    "\t\t\tstride = 2 if h_dim == in_channels else 1\n",
+    "            modules.append(\n",
+    "                nn.Sequential(\n",
+    "\t\t\t\t\tnn.BatchNorm2d(in_channels),\n",
+    "                    nn.Conv2d(in_channels, out_channels=h_dim,\n",
+    "                              kernel_size=3, stride=stride, padding=1),\n",
+    "                    nn.ReLU())\n",
+    "            )\n",
+    "            in_channels = h_dim\n",
+    "\t\t\n",
+    "\t\tself.encoder = nn.Sequential(*modules)\n",
+    "\t\t\n",
+    "\t\tmodules = []\n",
+    "\t\tfor fc_dim in fc_dims[:-2]:\n",
+    "            modules.append(\n",
+    "                nn.Sequential(\n",
+    "\t\t\t\t\tnn.Linear(in_fc, fc_dim),\n",
+    "                    nn.ReLU())\n",
+    "            )\n",
+    "            in_fc = fc_dim\n",
+    "\t\tself.encoder_bottleneck = nn.Sequential(*modules)\n",
+    "\n",
+    "\t\tself.mu_layer = nn.Sequential(\n",
+    "\t\t\tnn.Linear(fc_dims[-3], fc_dims[-2]),\n",
+    "            nn.ReLU(),\n",
+    "\t\t\tnn.Linear(fc_dims[-2], fc_dims[-1]))\n",
+    "\t\t\n",
+    "\t\tself.u_layer = nn.Sequential(\n",
+    "\t\t\tnn.Linear(fc_dims[-3], fc_dims[-2]),\n",
+    "            nn.ReLU(),\n",
+    "\t\t\tnn.Linear(fc_dims[-2], fc_dims[-1]))\n",
+    "\t\t\n",
+    "\t\tself.d_layer = nn.Sequential(\n",
+    "\t\t\tnn.Linear(fc_dims[-3], fc_dims[-2]),\n",
+    "            nn.ReLU(),\n",
+    "\t\t\tnn.Linear(fc_dims[-2], fc_dims[-1]))\n",
+    "\n",
+    "\t\tfc_dims.reverse()\n",
+    "\t\tmodules = []\n",
+    "\t\tfor i in range(len(fc_dims)):\n",
+    "\t\t\tout = self.fc_in if i == len(fc_dims) else fc_dims[i+1]\n",
+    "            modules.append(\n",
+    "                nn.Sequential(\n",
+    "\t\t\t\t\tnn.Linear(fc_dims[i], out),\n",
+    "                    nn.ReLU())\n",
+    "            )\n",
+    "\t\tself.decoder_bottleneck = nn.Sequential(*modules)\n",
+    "        \n",
+    "\t\thidden_dims.reverse()\n",
+    "\t\tmodules = []\n",
+    "\t\tfor i, h_dim in enumerate(hidden_dims):\n",
+    "\t\t\tstride = 2 if h_dim == in_channels else 1\n",
+    "\t\t\toutput_padding = 1 if h_dim == in_channels else 0\n",
+    "            modules.append(\n",
+    "                nn.Sequential(\n",
+    "\t\t\t\t\tnn.BatchNorm2d(in_channels),\n",
+    "                    nn.ConvTranspose2d(in_channels, out_channels=h_dim,\n",
+    "                              kernel_size=3, stride=stride, padding=1, output_padding=output_padding),\n",
+    "                    nn.ReLU() if i != len(hidden_dims))\n",
+    "            )\n",
+    "            in_channels = h_dim\n",
+    "\n",
+    "\t\tself.decoder = nn.Sequential(*modules)\n",
+    "\n",
+    "\tdef encode(self, x):\n",
+    "\t\t\"\"\"\n",
+    "\t\t\"\"\"\n",
+    "\t\tx = self.encoder(x.unsqueeze(self.in_channels)).view(-1, self.in_fc)\n",
+    "\t\tx = self.encoder_bottleneck(x)\n",
+    "\t\tmu = self.mu_layer(x)\n",
+    "\t\tu = self.u_layer(x).unsqueeze(-1)\n",
+    "\t\td = torch.exp(self.d_layer(x))\n",
+    "\t\treturn mu, u, d\n",
+    "\n",
+    "\n",
+    "\tdef decode(self, z):\n",
+    "\t\t\"\"\"\n",
+    "\t\t\"\"\"\n",
+    "\t\tz = self.decoder_bottleneck(z).view(-1,32,16,16)\n",
+    "\t\tz = self.decoder(z).view(-1, x_dim)\n",
+    "\t\treturn z\n",
+    "\n",
+    "    def reparametrize(self, mu, u, d):\n",
+    "        latent_dist = LowRankMultivariateNormal(mu, u, d)\n",
+    "\t\tz = latent_dist.rsample()\n",
+    "        return z, latent_dist\n",
+    "\n",
+    "\n",
+    "\tdef forward(self, x, return_latent_rec=False):\n",
+    "\t\tmu, u, d = self.encode(x)\n",
+    "\t\tz, latent_dist = self.reparametrize(mu, u, d)\n",
+    "\t\tx_rec = self.decode(z)\n",
+    "\t\treturn x_rec, {'z': z, 'mu': mu, 'latent_dist': latent_dist, 'u': u, 'd': d }"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "@model(family=VAEModel)\n",
+    "class AvaNet: # this will be renamed to Ava in implementation, just to avoid naming conflicts.\n",
+    "    \"\"\"\n",
+    "    \"\"\"\n",
+    "    network = Ava\n",
+    "    loss = VaeLoss\n",
+    "    optimizer = torch.optim.Adam\n",
+    "    metrics = {\n",
+    "        \"loss\": VaeLoss,\n",
+    "    }\n",
+    "    default_config = {\"optimizer\": {\"lr\": 0.003}}"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From 9885e6b8af33c5563aa005cfc5c1e16677d787d1 Mon Sep 17 00:00:00 2001
From: MB <maris.basha1@gmail.com>
Date: Mon, 4 Sep 2023 14:50:03 +0200
Subject: [PATCH 003/150] DEV: Added training and validation inside model
 family definition.

---
 .vscode/settings.json       |   6 +
 src/vak/models/__init__.py  |   5 +
 src/vak/models/ava.py       |  21 +++
 src/vak/models/vae_model.py |   4 +-
 src/vak/nets/__init__.py    |   2 +
 src/vak/nets/ava.py         | 119 ++++++++++++++++
 src/vak/nn/loss/__init__.py |   3 +
 src/vak/nn/loss/vae.py      |  54 +++++++
 test_vae.ipynb              | 272 +-----------------------------------
 9 files changed, 213 insertions(+), 273 deletions(-)
 create mode 100644 .vscode/settings.json
 create mode 100644 src/vak/models/ava.py
 create mode 100644 src/vak/nets/ava.py
 create mode 100644 src/vak/nn/loss/vae.py

diff --git a/.vscode/settings.json b/.vscode/settings.json
new file mode 100644
index 000000000..9ee86e71a
--- /dev/null
+++ b/.vscode/settings.json
@@ -0,0 +1,6 @@
+{
+    "[python]": {
+        "editor.defaultFormatter": "ms-python.autopep8"
+    },
+    "python.formatting.provider": "none"
+}
\ No newline at end of file
diff --git a/src/vak/models/__init__.py b/src/vak/models/__init__.py
index 604fa408e..05db4cb5a 100644
--- a/src/vak/models/__init__.py
+++ b/src/vak/models/__init__.py
@@ -8,6 +8,8 @@
 from .parametric_umap_model import ParametricUMAPModel
 from .registry import model_family
 from .tweetynet import TweetyNet
+from .vae_model import VAEModel
+from .ava import AVA
 
 __all__ = [
     "base",
@@ -23,4 +25,7 @@
     "ParametricUMAPModel",
     "registry",
     "TweetyNet",
+    "VAEModel",
+    "AVA",
+
 ]
diff --git a/src/vak/models/ava.py b/src/vak/models/ava.py
new file mode 100644
index 000000000..b64c815c4
--- /dev/null
+++ b/src/vak/models/ava.py
@@ -0,0 +1,21 @@
+from __future__ import annotations
+
+import torch
+
+from .. import metrics, nets
+from .decorator import model
+from .vae_model import VAEModel
+from ..nn.loss import VaeLoss
+
+@model(family=VAEModel)
+class AVA:
+    """
+    """
+    network = Ava
+    loss = VaeLoss
+    optimizer = torch.optim.Adam
+    metrics = {
+        "loss": VaeLoss,
+        "kl": torch.nn.functional.kl_div
+    }
+    default_config = {"optimizer": {"lr": 0.003}}
\ No newline at end of file
diff --git a/src/vak/models/vae_model.py b/src/vak/models/vae_model.py
index cd48530ea..96c974c89 100644
--- a/src/vak/models/vae_model.py
+++ b/src/vak/models/vae_model.py
@@ -46,7 +46,8 @@ def training_step(self, batch: tuple, batch_idx: int):
         """
         """
         x = batch[0]
-        out, z, latent_dist= self.network(x)
+        out, _ = self.network(x)
+        z, latent_dist  = itemgetter('z', 'latent_dist')(_)
         loss = self.loss(x, z, out, latent_dist)
         self.log("train_loss", loss)
         return loss
@@ -54,7 +55,6 @@ def training_step(self, batch: tuple, batch_idx: int):
     def training_step(self, batch: tuple, batch_idx: int):
         """
         """
-        
         x = batch[0]
         x = batch[0]
         out, _ = self.network(x)
diff --git a/src/vak/nets/__init__.py b/src/vak/nets/__init__.py
index e31b90bff..f716f0d16 100644
--- a/src/vak/nets/__init__.py
+++ b/src/vak/nets/__init__.py
@@ -2,6 +2,7 @@
 from .conv_encoder import ConvEncoder
 from .ed_tcn import ED_TCN
 from .tweetynet import TweetyNet
+from .ava import Ava
 
 __all__ = [
     "conv_encoder",
@@ -10,4 +11,5 @@
     "ED_TCN",
     "tweetynet",
     "TweetyNet",
+    "Ava"
 ]
diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
new file mode 100644
index 000000000..7222f675c
--- /dev/null
+++ b/src/vak/nets/ava.py
@@ -0,0 +1,119 @@
+from __future__ import annotations
+
+import torch
+from torch import nn
+from torch.distributions import LowRankMultivariateNormal
+
+class Ava(nn.Module):
+    """
+    """
+    def __init__(
+        self,
+        hidden_dims: list[int] = [8, 8, 16, 16, 24, 24, 32],
+		fc_dims: list[int] = [1024, 256, 64, 32],
+		in_channels: int = 1,
+		in_fc: int = 8192,
+		x_shape: tuple = (128, 128)	
+    ):
+        """
+        """
+        super().__init__()
+        self.in_fc = in_fc
+		self.in_channels = in_channels
+		self.x_shape = x_shape 
+		self.x_dim = torch.prod(x_shape)
+		modules = []
+		for h_dim in hidden_dims:
+			stride = 2 if h_dim == in_channels else 1
+            modules.append(
+                nn.Sequential(
+					nn.BatchNorm2d(in_channels),
+                    nn.Conv2d(in_channels, out_channels=h_dim,
+                              kernel_size=3, stride=stride, padding=1),
+                    nn.ReLU())
+            )
+            in_channels = h_dim
+		
+		self.encoder = nn.Sequential(*modules)
+		
+		modules = []
+		for fc_dim in fc_dims[:-2]:
+            modules.append(
+                nn.Sequential(
+					nn.Linear(in_fc, fc_dim),
+                    nn.ReLU())
+            )
+            in_fc = fc_dim
+		self.encoder_bottleneck = nn.Sequential(*modules)
+
+		self.mu_layer = nn.Sequential(
+			nn.Linear(fc_dims[-3], fc_dims[-2]),
+            nn.ReLU(),
+			nn.Linear(fc_dims[-2], fc_dims[-1]))
+		
+		self.u_layer = nn.Sequential(
+			nn.Linear(fc_dims[-3], fc_dims[-2]),
+            nn.ReLU(),
+			nn.Linear(fc_dims[-2], fc_dims[-1]))
+		
+		self.d_layer = nn.Sequential(
+			nn.Linear(fc_dims[-3], fc_dims[-2]),
+            nn.ReLU(),
+			nn.Linear(fc_dims[-2], fc_dims[-1]))
+
+		fc_dims.reverse()
+		modules = []
+		for i in range(len(fc_dims)):
+			out = self.fc_in if i == len(fc_dims) else fc_dims[i+1]
+            modules.append(
+                nn.Sequential(
+					nn.Linear(fc_dims[i], out),
+                    nn.ReLU())
+            )
+		self.decoder_bottleneck = nn.Sequential(*modules)
+        
+		hidden_dims.reverse()
+		modules = []
+		for i, h_dim in enumerate(hidden_dims):
+			stride = 2 if h_dim == in_channels else 1
+			output_padding = 1 if h_dim == in_channels else 0
+            modules.append(
+                nn.Sequential(
+					nn.BatchNorm2d(in_channels),
+                    nn.ConvTranspose2d(in_channels, out_channels=h_dim,
+                              kernel_size=3, stride=stride, padding=1, output_padding=output_padding),
+                    nn.ReLU() if i != len(hidden_dims))
+            )
+            in_channels = h_dim
+
+		self.decoder = nn.Sequential(*modules)
+
+    def encode(self, x):
+		"""
+		"""
+		x = self.encoder(x.unsqueeze(self.in_channels)).view(-1, self.in_fc)
+		x = self.encoder_bottleneck(x)
+		mu = self.mu_layer(x)
+		u = self.u_layer(x).unsqueeze(-1)
+		d = torch.exp(self.d_layer(x))
+		z, latent_dist = self.reparametrize(mu, u, d)
+		return z, latent_dist
+
+
+    def decode(self, z):
+		"""
+		"""
+		z = self.decoder_bottleneck(z).view(-1,32,16,16)
+		z = self.decoder(z).view(-1, x_dim)
+		return z
+
+    def reparametrize(self, mu, u, d):
+        latent_dist = LowRankMultivariateNormal(mu, u, d)
+		z = latent_dist.rsample()
+        return z, latent_dist
+
+
+	def forward(self, x, return_latent_rec=False):
+		z, latent_dist = self.encode(x)
+		x_rec = self.decode(z)
+		return x_rec, {'z': z, 'latent_dist': latent_dist,}
\ No newline at end of file
diff --git a/src/vak/nn/loss/__init__.py b/src/vak/nn/loss/__init__.py
index 18f4e6d2f..5325e82db 100644
--- a/src/vak/nn/loss/__init__.py
+++ b/src/vak/nn/loss/__init__.py
@@ -1,9 +1,12 @@
 from .dice import DiceLoss, dice_loss
 from .umap import UmapLoss, umap_loss
+from .vae import VaeLoss, vae_loss
 
 __all__ = [
     "DiceLoss",
     "dice_loss",
     "UmapLoss",
     "umap_loss",
+    "VaeLoss",
+    "vae_loss"
 ]
diff --git a/src/vak/nn/loss/vae.py b/src/vak/nn/loss/vae.py
new file mode 100644
index 000000000..e6c993a89
--- /dev/null
+++ b/src/vak/nn/loss/vae.py
@@ -0,0 +1,54 @@
+from __future__ import annotations
+
+import warnings
+import math
+import torch
+import numpy as np
+# vak.nn.loss.vae
+def vae_loss(
+    x: torch.Tensor,
+    z: torch.Tensor,
+    x_rec: torch.Tensor,
+    latent_dist: torch.Tensor,
+    model_precision: float,
+    z_dim: int
+):
+    pi = torch.tensor(math.pi)
+    x_dim = x.shape
+    elbo = -0.5 * ( torch.sum( torch.pow(z, 2) ) + z_dim * torch.log( 2 * pi ))
+    # E_{q(z|x)} p(x|z)
+    pxz_term = -0.5 * x_dim * (torch.log(2 * pi / model_precision))
+    l2s = torch.sum( torch.pow( x.view( x.shape[0], -1 ) - x_rec, 2), dim=1)
+    pxz_term = pxz_term - 0.5 * model_precision * torch.sum(l2s)
+    elbo = elbo + pxz_term
+    # H[q(z|x)]
+    elbo = elbo + torch.sum(latent_dist.entropy())
+    return elbo
+
+class VaeLoss(torch.nn.Module):
+    """"""
+
+    def __init__(
+        self,
+        return_latent_rec: bool = False,
+        model_precision: float = 10.0,
+        z_dim: int = 32
+    ):
+        super().__init__()
+        self.return_latent_rec = return_latent_rec
+        self.model_precision = model_precision
+        self.z_dim = z_dim
+
+    def forward(
+        self,
+        x: torch.Tensor,
+        z: torch.Tensor,
+        x_rec: torch.Tensor,
+        latent_dist: torch.Tensor,
+    ):
+        x_shape = x.shape
+        elbo = vae_loss(x=x, z=z, x_rec=x_rec, latent_dist=latent_dist, model_precision=self.model_precision, z_dim=self.z_dim)
+        if self.return_latent_rec:
+            return -elbo, z.detach().cpu().numpy(), \
+                x_rec.view(-1, x_shape[0], x_shape[1]).detach().cpu().numpy()
+        return -elbo
diff --git a/test_vae.ipynb b/test_vae.ipynb
index c921a5b1f..142f3a2b7 100644
--- a/test_vae.ipynb
+++ b/test_vae.ipynb
@@ -5,277 +5,7 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": [
-    "from __future__ import annotations\n",
-    "\n",
-    "import pathlib\n",
-    "from typing import Callable, ClassVar, Type\n",
-    "\n",
-    "import pytorch_lightning as lightning\n",
-    "import torch\n",
-    "import torch.utils.data\n",
-    "from torch import nn\n",
-    "\n",
-    "# The src.vak.models prefix has to be removed in the actual implementation\n",
-    "from src.vak.models.registry import model_family\n",
-    "from src.vak.models import base\n",
-    "from src.vak.models.definition import ModelDefinition"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# vak.nn.loss.vae\n",
-    "def vae_loss(\n",
-    "    x: torch.Tensor,\n",
-    "    z: torch.Tensor,\n",
-    "    x_rec: torch.Tensor,\n",
-    "    latent_dist: torch.Tensor,\n",
-    "    model_precision: float,\n",
-    "    z_dim: int\n",
-    "):\n",
-    "\n",
-    "    x_dim = x.shape\n",
-    "    elbo = -0.5 * ( torch.sum( torch.pow(z, 2) ) + z_dim * np.log( 2 * np.pi ))\n",
-    "    # E_{q(z|x)} p(x|z)\n",
-    "    pxz_term = -0.5 * x_dim * (np.log(2 * np.pi / model_precision))\n",
-    "    l2s = torch.sum( torch.pow( x.view( x.shape[0], -1 ) - x_rec, 2), dim=1)\n",
-    "    pxz_term = pxz_term - 0.5 * model_precision * torch.sum(l2s)\n",
-    "    elbo = elbo + pxz_term\n",
-    "    # H[q(z|x)]\n",
-    "    elbo = elbo + torch.sum(latent_dist.entropy())\n",
-    "    return elbo\n",
-    "\n",
-    "class VaeLoss(torch.nn.Module):\n",
-    "    \"\"\"\"\"\"\n",
-    "\n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        return_latent_rec: bool = False,\n",
-    "        model_precision: float = 10.0,\n",
-    "        z_dim: int = 32\n",
-    "    ):\n",
-    "        super().__init__()\n",
-    "        self.return_latent_rec = return_latent_rec\n",
-    "        self.model_precision = model_precision\n",
-    "        self.z_dim = z_dim\n",
-    "\n",
-    "    def forward(\n",
-    "        self,\n",
-    "        x: torch.Tensor,\n",
-    "        z: torch.Tensor,\n",
-    "        x_rec: torch.Tensor,\n",
-    "        latent_dist: torch.Tensor,\n",
-    "    ):\n",
-    "        x_shape = x.shape\n",
-    "        elbo = vae_loss(x=x, z=z, x_rec=x_rec, latent_dist=latent_dist, model_precision=self.model_precision, z_dim=self.z_dim)\n",
-    "        if self.return_latent_rec:\n",
-    "            return -elbo, z.detach().cpu().numpy(), \\\n",
-    "                x_rec.view(-1, x_shape[0], x_shape[1]).detach().cpu().numpy()\n",
-    "        return -elbo\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/mb/Library/Mobile Documents/com~apple~CloudDocs/gits/vak/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "# vak.models.vae_model.VAEModel\n",
-    "@model_family\n",
-    "class VAEModel(base.Model):\n",
-    "    definition: ClassVar[ModelDefinition]\n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        network: dict | None = None,\n",
-    "        loss: torch.nn.Module | Callable | None = None,\n",
-    "        optimizer: torch.optim.Optimizer | None = None,\n",
-    "        metrics: dict[str:Type] | None = None,\n",
-    "    ):\n",
-    "        super().__init__(\n",
-    "            network=network, loss=loss, optimizer=optimizer, metrics=metrics\n",
-    "        )\n",
-    "        self.encoder = network['encode']\n",
-    "        self.decoder = network['decode']\n",
-    "\n",
-    "    def forward(self, x):\n",
-    "        return self.network(x)\n",
-    "\n",
-    "    def encode(self, x):\n",
-    "        return self.encoder(x)\n",
-    "    \n",
-    "    def decode(self, x):\n",
-    "        return self.decoder(x)\n",
-    "\n",
-    "    def configure_optimizers(self):\n",
-    "        return self.optimizer\n",
-    "\n",
-    "    @classmethod\n",
-    "    def from_config(\n",
-    "        cls, config: dict\n",
-    "    ):\n",
-    "        network, loss, optimizer, metrics = cls.attributes_from_config(config)\n",
-    "        return cls(\n",
-    "            network=network,\n",
-    "            optimizer=optimizer,\n",
-    "            loss=loss,\n",
-    "            metrics=metrics,\n",
-    "        )\n"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "# nets.Ava\n",
-    "class Ava(nn.Module):\n",
-    "    \"\"\"\n",
-    "    \"\"\"\n",
-    "    def __init__(\n",
-    "        self,\n",
-    "        hidden_dims: List[int] = [8, 8, 16, 16, 24, 24, 32]\n",
-    "\t\tfc_dims: List[int] = [1024, 256, 64, 32]\n",
-    "\t\tin_channels: int = 1,\n",
-    "\t\tin_fc: int = 8192,\n",
-    "\t\tx_shape = tuple = (128, 128)\n",
-    "\t\t\n",
-    "    ):\n",
-    "        \"\"\"\n",
-    "        \"\"\"\n",
-    "        super().__init__()\n",
-    "\t\tself.in_fc = in_fc\n",
-    "\t\tself.in_channels = in_channels\n",
-    "\t\tself.x_shape = x_shape \n",
-    "\t\tself.x_dim = np.prod(x_shape)\n",
-    "\t\tmodules = []\n",
-    "\t\tfor h_dim in hidden_dims:\n",
-    "\t\t\tstride = 2 if h_dim == in_channels else 1\n",
-    "            modules.append(\n",
-    "                nn.Sequential(\n",
-    "\t\t\t\t\tnn.BatchNorm2d(in_channels),\n",
-    "                    nn.Conv2d(in_channels, out_channels=h_dim,\n",
-    "                              kernel_size=3, stride=stride, padding=1),\n",
-    "                    nn.ReLU())\n",
-    "            )\n",
-    "            in_channels = h_dim\n",
-    "\t\t\n",
-    "\t\tself.encoder = nn.Sequential(*modules)\n",
-    "\t\t\n",
-    "\t\tmodules = []\n",
-    "\t\tfor fc_dim in fc_dims[:-2]:\n",
-    "            modules.append(\n",
-    "                nn.Sequential(\n",
-    "\t\t\t\t\tnn.Linear(in_fc, fc_dim),\n",
-    "                    nn.ReLU())\n",
-    "            )\n",
-    "            in_fc = fc_dim\n",
-    "\t\tself.encoder_bottleneck = nn.Sequential(*modules)\n",
-    "\n",
-    "\t\tself.mu_layer = nn.Sequential(\n",
-    "\t\t\tnn.Linear(fc_dims[-3], fc_dims[-2]),\n",
-    "            nn.ReLU(),\n",
-    "\t\t\tnn.Linear(fc_dims[-2], fc_dims[-1]))\n",
-    "\t\t\n",
-    "\t\tself.u_layer = nn.Sequential(\n",
-    "\t\t\tnn.Linear(fc_dims[-3], fc_dims[-2]),\n",
-    "            nn.ReLU(),\n",
-    "\t\t\tnn.Linear(fc_dims[-2], fc_dims[-1]))\n",
-    "\t\t\n",
-    "\t\tself.d_layer = nn.Sequential(\n",
-    "\t\t\tnn.Linear(fc_dims[-3], fc_dims[-2]),\n",
-    "            nn.ReLU(),\n",
-    "\t\t\tnn.Linear(fc_dims[-2], fc_dims[-1]))\n",
-    "\n",
-    "\t\tfc_dims.reverse()\n",
-    "\t\tmodules = []\n",
-    "\t\tfor i in range(len(fc_dims)):\n",
-    "\t\t\tout = self.fc_in if i == len(fc_dims) else fc_dims[i+1]\n",
-    "            modules.append(\n",
-    "                nn.Sequential(\n",
-    "\t\t\t\t\tnn.Linear(fc_dims[i], out),\n",
-    "                    nn.ReLU())\n",
-    "            )\n",
-    "\t\tself.decoder_bottleneck = nn.Sequential(*modules)\n",
-    "        \n",
-    "\t\thidden_dims.reverse()\n",
-    "\t\tmodules = []\n",
-    "\t\tfor i, h_dim in enumerate(hidden_dims):\n",
-    "\t\t\tstride = 2 if h_dim == in_channels else 1\n",
-    "\t\t\toutput_padding = 1 if h_dim == in_channels else 0\n",
-    "            modules.append(\n",
-    "                nn.Sequential(\n",
-    "\t\t\t\t\tnn.BatchNorm2d(in_channels),\n",
-    "                    nn.ConvTranspose2d(in_channels, out_channels=h_dim,\n",
-    "                              kernel_size=3, stride=stride, padding=1, output_padding=output_padding),\n",
-    "                    nn.ReLU() if i != len(hidden_dims))\n",
-    "            )\n",
-    "            in_channels = h_dim\n",
-    "\n",
-    "\t\tself.decoder = nn.Sequential(*modules)\n",
-    "\n",
-    "\tdef encode(self, x):\n",
-    "\t\t\"\"\"\n",
-    "\t\t\"\"\"\n",
-    "\t\tx = self.encoder(x.unsqueeze(self.in_channels)).view(-1, self.in_fc)\n",
-    "\t\tx = self.encoder_bottleneck(x)\n",
-    "\t\tmu = self.mu_layer(x)\n",
-    "\t\tu = self.u_layer(x).unsqueeze(-1)\n",
-    "\t\td = torch.exp(self.d_layer(x))\n",
-    "\t\treturn mu, u, d\n",
-    "\n",
-    "\n",
-    "\tdef decode(self, z):\n",
-    "\t\t\"\"\"\n",
-    "\t\t\"\"\"\n",
-    "\t\tz = self.decoder_bottleneck(z).view(-1,32,16,16)\n",
-    "\t\tz = self.decoder(z).view(-1, x_dim)\n",
-    "\t\treturn z\n",
-    "\n",
-    "    def reparametrize(self, mu, u, d):\n",
-    "        latent_dist = LowRankMultivariateNormal(mu, u, d)\n",
-    "\t\tz = latent_dist.rsample()\n",
-    "        return z, latent_dist\n",
-    "\n",
-    "\n",
-    "\tdef forward(self, x, return_latent_rec=False):\n",
-    "\t\tmu, u, d = self.encode(x)\n",
-    "\t\tz, latent_dist = self.reparametrize(mu, u, d)\n",
-    "\t\tx_rec = self.decode(z)\n",
-    "\t\treturn x_rec, {'z': z, 'mu': mu, 'latent_dist': latent_dist, 'u': u, 'd': d }"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "@model(family=VAEModel)\n",
-    "class AvaNet: # this will be renamed to Ava in implementation, just to avoid naming conflicts.\n",
-    "    \"\"\"\n",
-    "    \"\"\"\n",
-    "    network = Ava\n",
-    "    loss = VaeLoss\n",
-    "    optimizer = torch.optim.Adam\n",
-    "    metrics = {\n",
-    "        \"loss\": VaeLoss,\n",
-    "    }\n",
-    "    default_config = {\"optimizer\": {\"lr\": 0.003}}"
-   ]
+   "source": []
   }
  ],
  "metadata": {

From 4de1b0979686ac1dff5c5ddefc4aebcbe600c41c Mon Sep 17 00:00:00 2001
From: MB <maris.basha1@gmail.com>
Date: Sun, 17 Sep 2023 21:58:58 +0200
Subject: [PATCH 004/150] dev: Added suggestions and tested network forward
 result.

---
 src/vak/models/ava.py       |   8 +-
 src/vak/models/vae_model.py |   3 +-
 src/vak/nets/__init__.py    |   2 +-
 src/vak/nets/ava.py         | 163 +++++++++++++++++++-----------------
 src/vak/nn/loss/__init__.py |   6 +-
 src/vak/nn/loss/vae.py      |   9 +-
 test_vae.ipynb              |  20 ++++-
 7 files changed, 116 insertions(+), 95 deletions(-)

diff --git a/src/vak/models/ava.py b/src/vak/models/ava.py
index b64c815c4..b4206003a 100644
--- a/src/vak/models/ava.py
+++ b/src/vak/models/ava.py
@@ -5,17 +5,17 @@
 from .. import metrics, nets
 from .decorator import model
 from .vae_model import VAEModel
-from ..nn.loss import VaeLoss
+from ..nn.loss import VaeElboLoss
 
 @model(family=VAEModel)
 class AVA:
     """
     """
-    network = Ava
-    loss = VaeLoss
+    network = nets.Ava
+    loss = VaeElboLoss
     optimizer = torch.optim.Adam
     metrics = {
-        "loss": VaeLoss,
+        "loss": VaeElboLoss,
         "kl": torch.nn.functional.kl_div
     }
     default_config = {"optimizer": {"lr": 0.003}}
\ No newline at end of file
diff --git a/src/vak/models/vae_model.py b/src/vak/models/vae_model.py
index 96c974c89..edc80f2bd 100644
--- a/src/vak/models/vae_model.py
+++ b/src/vak/models/vae_model.py
@@ -46,8 +46,7 @@ def training_step(self, batch: tuple, batch_idx: int):
         """
         """
         x = batch[0]
-        out, _ = self.network(x)
-        z, latent_dist  = itemgetter('z', 'latent_dist')(_)
+        out, z, latent_dist= self.network(x)
         loss = self.loss(x, z, out, latent_dist)
         self.log("train_loss", loss)
         return loss
diff --git a/src/vak/nets/__init__.py b/src/vak/nets/__init__.py
index f716f0d16..dfd3f5db0 100644
--- a/src/vak/nets/__init__.py
+++ b/src/vak/nets/__init__.py
@@ -11,5 +11,5 @@
     "ED_TCN",
     "tweetynet",
     "TweetyNet",
-    "Ava"
+    "Ava",
 ]
diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index 7222f675c..18bee6702 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -1,119 +1,124 @@
-from __future__ import annotations
+# from __future__ import annotations
 
 import torch
 from torch import nn
 from torch.distributions import LowRankMultivariateNormal
+from typing import Tuple
+
+# Is it necessary to put this in src.vak.nn.modules?
+class BottleneckLayer(nn.Module):
+    def __init__(self, dims):
+        super().__init__()
+        self.layer = nn.Sequential(
+            nn.Linear(dims[0], dims[1]),
+            nn.ReLU(),
+            nn.Linear(dims[1], dims[2]))
+
+    def forward(self, x):
+        return self.layer(x)
 
 class Ava(nn.Module):
     """
     """
     def __init__(
         self,
-        hidden_dims: list[int] = [8, 8, 16, 16, 24, 24, 32],
-		fc_dims: list[int] = [1024, 256, 64, 32],
-		in_channels: int = 1,
-		in_fc: int = 8192,
-		x_shape: tuple = (128, 128)	
+        hidden_dims: Tuple[int] = (8, 8, 16, 16, 24, 24),
+        fc_dims: Tuple[int] = (1024, 256, 64),
+        z_dim: int = 32,
+        in_channels: int = 1,
+        x_shape: Tuple[int] = (128, 128)    
     ):
         """
         """
         super().__init__()
-        self.in_fc = in_fc
-		self.in_channels = in_channels
-		self.x_shape = x_shape 
-		self.x_dim = torch.prod(x_shape)
-		modules = []
-		for h_dim in hidden_dims:
-			stride = 2 if h_dim == in_channels else 1
+        fc_dims = (*fc_dims, z_dim)
+        hidden_dims = (*hidden_dims, z_dim)
+        
+        self.in_channels = in_channels
+        self.fc_view = (int(fc_dims[-1]),int(fc_dims[-1]/2),int(fc_dims[-1]/2))
+        self.x_shape = torch.tensor(x_shape)
+        self.x_dim = torch.prod(self.x_shape)
+        self.in_fc = int(self.x_dim / 2)
+        in_fc = self.in_fc
+        modules = []
+        for h_dim in hidden_dims:
+            stride = 2 if h_dim == in_channels else 1
             modules.append(
                 nn.Sequential(
-					nn.BatchNorm2d(in_channels),
+                    nn.BatchNorm2d(in_channels),
                     nn.Conv2d(in_channels, out_channels=h_dim,
                               kernel_size=3, stride=stride, padding=1),
                     nn.ReLU())
             )
             in_channels = h_dim
-		
-		self.encoder = nn.Sequential(*modules)
-		
-		modules = []
-		for fc_dim in fc_dims[:-2]:
+        
+        self.encoder = nn.Sequential(*modules)
+        
+        modules = []
+        for fc_dim in fc_dims[:-2]:
             modules.append(
                 nn.Sequential(
-					nn.Linear(in_fc, fc_dim),
+                    nn.Linear(in_fc, fc_dim),
                     nn.ReLU())
             )
             in_fc = fc_dim
-		self.encoder_bottleneck = nn.Sequential(*modules)
-
-		self.mu_layer = nn.Sequential(
-			nn.Linear(fc_dims[-3], fc_dims[-2]),
-            nn.ReLU(),
-			nn.Linear(fc_dims[-2], fc_dims[-1]))
-		
-		self.u_layer = nn.Sequential(
-			nn.Linear(fc_dims[-3], fc_dims[-2]),
-            nn.ReLU(),
-			nn.Linear(fc_dims[-2], fc_dims[-1]))
-		
-		self.d_layer = nn.Sequential(
-			nn.Linear(fc_dims[-3], fc_dims[-2]),
-            nn.ReLU(),
-			nn.Linear(fc_dims[-2], fc_dims[-1]))
-
-		fc_dims.reverse()
-		modules = []
-		for i in range(len(fc_dims)):
-			out = self.fc_in if i == len(fc_dims) else fc_dims[i+1]
+        self.encoder_bottleneck = nn.Sequential(*modules)
+        self.mu_layer = BottleneckLayer(fc_dims[-3:])
+        self.cov_factor_layer = BottleneckLayer(fc_dims[-3:])
+        self.cov_diag_layer = BottleneckLayer(fc_dims[-3:])
+        fc_dims = fc_dims[::-1]
+        modules = []
+        for i in range(len(fc_dims)):
+            out = self.in_fc if i == len(fc_dims) - 1 else fc_dims[i+1]
             modules.append(
                 nn.Sequential(
-					nn.Linear(fc_dims[i], out),
+                    nn.Linear(fc_dims[i], out),
                     nn.ReLU())
             )
-		self.decoder_bottleneck = nn.Sequential(*modules)
-        
-		hidden_dims.reverse()
-		modules = []
-		for i, h_dim in enumerate(hidden_dims):
-			stride = 2 if h_dim == in_channels else 1
-			output_padding = 1 if h_dim == in_channels else 0
-            modules.append(
-                nn.Sequential(
-					nn.BatchNorm2d(in_channels),
-                    nn.ConvTranspose2d(in_channels, out_channels=h_dim,
-                              kernel_size=3, stride=stride, padding=1, output_padding=output_padding),
-                    nn.ReLU() if i != len(hidden_dims))
-            )
+        self.decoder_bottleneck = nn.Sequential(*modules)
+        hidden_dims = ( *hidden_dims[-2::-1], self.in_channels)
+        hidden_dims
+        modules = []
+        for i, h_dim in enumerate(hidden_dims):
+            stride = 2 if h_dim == in_channels else 1
+            output_padding = 1 if h_dim == in_channels else 0
+            layers = [ nn.BatchNorm2d(in_channels), 
+                        nn.ConvTranspose2d(in_channels, out_channels=h_dim, kernel_size=3, stride=stride, padding=1, output_padding=output_padding)]
+            if i != len(hidden_dims) - 1:
+                layers.append(nn.ReLU())
+                
+            modules.append( nn.Sequential(*layers) )
             in_channels = h_dim
 
-		self.decoder = nn.Sequential(*modules)
+        self.decoder = nn.Sequential(*modules)
 
     def encode(self, x):
-		"""
-		"""
-		x = self.encoder(x.unsqueeze(self.in_channels)).view(-1, self.in_fc)
-		x = self.encoder_bottleneck(x)
-		mu = self.mu_layer(x)
-		u = self.u_layer(x).unsqueeze(-1)
-		d = torch.exp(self.d_layer(x))
-		z, latent_dist = self.reparametrize(mu, u, d)
-		return z, latent_dist
+        """
+        """
+        x = self.encoder(x.unsqueeze(self.in_channels)).view(-1, self.in_fc)
+        x = self.encoder_bottleneck(x)
+        mu = self.mu_layer(x)
+        cov_factor = self.cov_factor_layer(x).unsqueeze(-1)
+        cov_diag = torch.exp(self.cov_diag_layer(x))
+        z, latent_dist = self.reparametrize(mu, cov_factor, cov_diag)
+        return z, latent_dist
 
 
     def decode(self, z):
-		"""
-		"""
-		z = self.decoder_bottleneck(z).view(-1,32,16,16)
-		z = self.decoder(z).view(-1, x_dim)
-		return z
-
-    def reparametrize(self, mu, u, d):
-        latent_dist = LowRankMultivariateNormal(mu, u, d)
-		z = latent_dist.rsample()
+        """
+        """
+        z = self.decoder_bottleneck(z).view(-1, self.fc_view[0], self.fc_view[1], self.fc_view[2])
+        z = self.decoder(z).view(-1, self.x_dim)
+        return z
+    
+    @staticmethod
+    def reparametrize(mu, cov_factor, cov_diag):
+        latent_dist = LowRankMultivariateNormal(mu, cov_factor, cov_diag)
+        z = latent_dist.rsample()
         return z, latent_dist
 
 
-	def forward(self, x, return_latent_rec=False):
-		z, latent_dist = self.encode(x)
-		x_rec = self.decode(z)
-		return x_rec, {'z': z, 'latent_dist': latent_dist,}
\ No newline at end of file
+    def forward(self, x):
+        z, latent_dist = self.encode(x)
+        x_rec = self.decode(z).view(-1, self.x_shape[0], self.x_shape[1])
+        return x_rec, z, latent_dist
\ No newline at end of file
diff --git a/src/vak/nn/loss/__init__.py b/src/vak/nn/loss/__init__.py
index 5325e82db..73230313a 100644
--- a/src/vak/nn/loss/__init__.py
+++ b/src/vak/nn/loss/__init__.py
@@ -1,12 +1,12 @@
 from .dice import DiceLoss, dice_loss
 from .umap import UmapLoss, umap_loss
-from .vae import VaeLoss, vae_loss
+from .vae import VaeElboLoss, vae_elbo_loss
 
 __all__ = [
     "DiceLoss",
     "dice_loss",
     "UmapLoss",
     "umap_loss",
-    "VaeLoss",
-    "vae_loss"
+    "VaeElboLoss",
+    "vae_elbo_loss"
 ]
diff --git a/src/vak/nn/loss/vae.py b/src/vak/nn/loss/vae.py
index e6c993a89..af533aa79 100644
--- a/src/vak/nn/loss/vae.py
+++ b/src/vak/nn/loss/vae.py
@@ -4,8 +4,9 @@
 import math
 import torch
 import numpy as np
-# vak.nn.loss.vae
-def vae_loss(
+
+
+def vae_elbo_loss(
     x: torch.Tensor,
     z: torch.Tensor,
     x_rec: torch.Tensor,
@@ -25,7 +26,7 @@ def vae_loss(
     elbo = elbo + torch.sum(latent_dist.entropy())
     return elbo
 
-class VaeLoss(torch.nn.Module):
+class VaeElboLoss(torch.nn.Module):
     """"""
 
     def __init__(
@@ -47,7 +48,7 @@ def forward(
         latent_dist: torch.Tensor,
     ):
         x_shape = x.shape
-        elbo = vae_loss(x=x, z=z, x_rec=x_rec, latent_dist=latent_dist, model_precision=self.model_precision, z_dim=self.z_dim)
+        elbo = vae_elbo_loss(x=x, z=z, x_rec=x_rec, latent_dist=latent_dist, model_precision=self.model_precision, z_dim=self.z_dim)
         if self.return_latent_rec:
             return -elbo, z.detach().cpu().numpy(), \
                 x_rec.view(-1, x_shape[0], x_shape[1]).detach().cpu().numpy()
diff --git a/test_vae.ipynb b/test_vae.ipynb
index 142f3a2b7..9084cb9d8 100644
--- a/test_vae.ipynb
+++ b/test_vae.ipynb
@@ -2,10 +2,26 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "from src.vak.nets.ava import Ava\n",
+    "import torch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_shape = (3, 128, 512)\n",
+    "input = torch.zeros(x_shape)\n",
+    "net = Ava(x_shape=(x_shape[1], x_shape[2]))\n",
+    "output, _ = net.forward(input)\n",
+    "assert output.shape == x_shape, 'Error'"
+   ]
   }
  ],
  "metadata": {

From d39f14a0ea937b6e9da7a94ef8a5d4f5e4d2614e Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:14:49 -0400
Subject: [PATCH 005/150] Add src/vak/nets/ava.py

---
 src/vak/nets/ava.py | 163 ++++++++++++++++++++++----------------------
 1 file changed, 80 insertions(+), 83 deletions(-)

diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index 18bee6702..bcd804c15 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -1,124 +1,121 @@
-# from __future__ import annotations
+from __future__ import annotations
 
 import torch
 from torch import nn
 from torch.distributions import LowRankMultivariateNormal
-from typing import Tuple
 
-# Is it necessary to put this in src.vak.nn.modules?
-class BottleneckLayer(nn.Module):
-    def __init__(self, dims):
-        super().__init__()
-        self.layer = nn.Sequential(
-            nn.Linear(dims[0], dims[1]),
-            nn.ReLU(),
-            nn.Linear(dims[1], dims[2]))
-
-    def forward(self, x):
-        return self.layer(x)
 
 class Ava(nn.Module):
     """
     """
     def __init__(
         self,
-        hidden_dims: Tuple[int] = (8, 8, 16, 16, 24, 24),
-        fc_dims: Tuple[int] = (1024, 256, 64),
-        z_dim: int = 32,
-        in_channels: int = 1,
-        x_shape: Tuple[int] = (128, 128)    
+        hidden_dims: list[int] = [8, 8, 16, 16, 24, 24, 32],
+		fc_dims: list[int] = [1024, 256, 64, 32],
+		in_channels: int = 1,
+		in_fc: int = 8192,
+		x_shape: tuple = (128, 128)
     ):
         """
         """
         super().__init__()
-        fc_dims = (*fc_dims, z_dim)
-        hidden_dims = (*hidden_dims, z_dim)
-        
-        self.in_channels = in_channels
-        self.fc_view = (int(fc_dims[-1]),int(fc_dims[-1]/2),int(fc_dims[-1]/2))
-        self.x_shape = torch.tensor(x_shape)
-        self.x_dim = torch.prod(self.x_shape)
-        self.in_fc = int(self.x_dim / 2)
-        in_fc = self.in_fc
-        modules = []
-        for h_dim in hidden_dims:
-            stride = 2 if h_dim == in_channels else 1
+        self.in_fc = in_fc
+		self.in_channels = in_channels
+		self.x_shape = x_shape 
+		self.x_dim = torch.prod(x_shape)
+		modules = []
+		for h_dim in hidden_dims:
+			stride = 2 if h_dim == in_channels else 1
             modules.append(
                 nn.Sequential(
-                    nn.BatchNorm2d(in_channels),
+					nn.BatchNorm2d(in_channels),
                     nn.Conv2d(in_channels, out_channels=h_dim,
                               kernel_size=3, stride=stride, padding=1),
                     nn.ReLU())
             )
             in_channels = h_dim
-        
-        self.encoder = nn.Sequential(*modules)
-        
-        modules = []
-        for fc_dim in fc_dims[:-2]:
+
+		self.encoder = nn.Sequential(*modules)
+		
+		modules = []
+		for fc_dim in fc_dims[:-2]:
             modules.append(
                 nn.Sequential(
-                    nn.Linear(in_fc, fc_dim),
+					nn.Linear(in_fc, fc_dim),
                     nn.ReLU())
             )
             in_fc = fc_dim
-        self.encoder_bottleneck = nn.Sequential(*modules)
-        self.mu_layer = BottleneckLayer(fc_dims[-3:])
-        self.cov_factor_layer = BottleneckLayer(fc_dims[-3:])
-        self.cov_diag_layer = BottleneckLayer(fc_dims[-3:])
-        fc_dims = fc_dims[::-1]
-        modules = []
-        for i in range(len(fc_dims)):
-            out = self.in_fc if i == len(fc_dims) - 1 else fc_dims[i+1]
+		self.encoder_bottleneck = nn.Sequential(*modules)
+
+		self.mu_layer = nn.Sequential(
+			nn.Linear(fc_dims[-3], fc_dims[-2]),
+            nn.ReLU(),
+			nn.Linear(fc_dims[-2], fc_dims[-1]))
+		
+		self.u_layer = nn.Sequential(
+			nn.Linear(fc_dims[-3], fc_dims[-2]),
+            nn.ReLU(),
+			nn.Linear(fc_dims[-2], fc_dims[-1]))
+		
+		self.d_layer = nn.Sequential(
+			nn.Linear(fc_dims[-3], fc_dims[-2]),
+            nn.ReLU(),
+			nn.Linear(fc_dims[-2], fc_dims[-1]))
+
+		fc_dims.reverse()
+		modules = []
+		for i in range(len(fc_dims)):
+			out = self.fc_in if i == len(fc_dims) else fc_dims[i+1]
             modules.append(
                 nn.Sequential(
-                    nn.Linear(fc_dims[i], out),
+					nn.Linear(fc_dims[i], out),
                     nn.ReLU())
             )
-        self.decoder_bottleneck = nn.Sequential(*modules)
-        hidden_dims = ( *hidden_dims[-2::-1], self.in_channels)
-        hidden_dims
-        modules = []
-        for i, h_dim in enumerate(hidden_dims):
-            stride = 2 if h_dim == in_channels else 1
-            output_padding = 1 if h_dim == in_channels else 0
-            layers = [ nn.BatchNorm2d(in_channels), 
-                        nn.ConvTranspose2d(in_channels, out_channels=h_dim, kernel_size=3, stride=stride, padding=1, output_padding=output_padding)]
-            if i != len(hidden_dims) - 1:
-                layers.append(nn.ReLU())
-                
-            modules.append( nn.Sequential(*layers) )
+		self.decoder_bottleneck = nn.Sequential(*modules)
+        
+		hidden_dims.reverse()
+		modules = []
+		for i, h_dim in enumerate(hidden_dims):
+			stride = 2 if h_dim == in_channels else 1
+			output_padding = 1 if h_dim == in_channels else 0
+            modules.append(
+                nn.Sequential(
+					nn.BatchNorm2d(in_channels),
+                    nn.ConvTranspose2d(in_channels, out_channels=h_dim,
+                              kernel_size=3, stride=stride, padding=1, output_padding=output_padding),
+                    nn.ReLU() if i != len(hidden_dims))
+            )
             in_channels = h_dim
 
-        self.decoder = nn.Sequential(*modules)
+		self.decoder = nn.Sequential(*modules)
 
     def encode(self, x):
-        """
-        """
-        x = self.encoder(x.unsqueeze(self.in_channels)).view(-1, self.in_fc)
-        x = self.encoder_bottleneck(x)
-        mu = self.mu_layer(x)
-        cov_factor = self.cov_factor_layer(x).unsqueeze(-1)
-        cov_diag = torch.exp(self.cov_diag_layer(x))
-        z, latent_dist = self.reparametrize(mu, cov_factor, cov_diag)
-        return z, latent_dist
+		"""
+		"""
+		x = self.encoder(x.unsqueeze(self.in_channels)).view(-1, self.in_fc)
+		x = self.encoder_bottleneck(x)
+		mu = self.mu_layer(x)
+		u = self.u_layer(x).unsqueeze(-1)
+		d = torch.exp(self.d_layer(x))
+		z, latent_dist = self.reparametrize(mu, u, d)
+		return z, latent_dist
 
 
     def decode(self, z):
-        """
-        """
-        z = self.decoder_bottleneck(z).view(-1, self.fc_view[0], self.fc_view[1], self.fc_view[2])
-        z = self.decoder(z).view(-1, self.x_dim)
-        return z
-    
-    @staticmethod
-    def reparametrize(mu, cov_factor, cov_diag):
-        latent_dist = LowRankMultivariateNormal(mu, cov_factor, cov_diag)
-        z = latent_dist.rsample()
+		"""
+		"""
+		z = self.decoder_bottleneck(z).view(-1,32,16,16)
+		z = self.decoder(z).view(-1, x_dim)
+		return z
+
+    def reparametrize(self, mu, u, d):
+        latent_dist = LowRankMultivariateNormal(mu, u, d)
+		z = latent_dist.rsample()
         return z, latent_dist
 
 
-    def forward(self, x):
-        z, latent_dist = self.encode(x)
-        x_rec = self.decode(z).view(-1, self.x_shape[0], self.x_shape[1])
-        return x_rec, z, latent_dist
\ No newline at end of file
+	def forward(self, x, return_latent_rec=False):
+		z, latent_dist = self.encode(x)
+		x_rec = self.decode(z)
+		return x_rec, {'z': z, 'latent_dist': latent_dist,}
+

From aee76bf6cad38c6008bd7e5e8bfbd9626b924be9 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:14:57 -0400
Subject: [PATCH 006/150] Add src/vak/nn/loss/vae.py

---
 src/vak/nn/loss/vae.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/src/vak/nn/loss/vae.py b/src/vak/nn/loss/vae.py
index af533aa79..e6c993a89 100644
--- a/src/vak/nn/loss/vae.py
+++ b/src/vak/nn/loss/vae.py
@@ -4,9 +4,8 @@
 import math
 import torch
 import numpy as np
-
-
-def vae_elbo_loss(
+# vak.nn.loss.vae
+def vae_loss(
     x: torch.Tensor,
     z: torch.Tensor,
     x_rec: torch.Tensor,
@@ -26,7 +25,7 @@ def vae_elbo_loss(
     elbo = elbo + torch.sum(latent_dist.entropy())
     return elbo
 
-class VaeElboLoss(torch.nn.Module):
+class VaeLoss(torch.nn.Module):
     """"""
 
     def __init__(
@@ -48,7 +47,7 @@ def forward(
         latent_dist: torch.Tensor,
     ):
         x_shape = x.shape
-        elbo = vae_elbo_loss(x=x, z=z, x_rec=x_rec, latent_dist=latent_dist, model_precision=self.model_precision, z_dim=self.z_dim)
+        elbo = vae_loss(x=x, z=z, x_rec=x_rec, latent_dist=latent_dist, model_precision=self.model_precision, z_dim=self.z_dim)
         if self.return_latent_rec:
             return -elbo, z.detach().cpu().numpy(), \
                 x_rec.view(-1, x_shape[0], x_shape[1]).detach().cpu().numpy()

From f132cb300891723b0e255e94343bb299717f8cdd Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:15:20 -0400
Subject: [PATCH 007/150] Import vae_loss and VaeLoss in
 src/vak/nn/loss/__init__.py

---
 src/vak/nn/loss/__init__.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/vak/nn/loss/__init__.py b/src/vak/nn/loss/__init__.py
index 73230313a..962338f7c 100644
--- a/src/vak/nn/loss/__init__.py
+++ b/src/vak/nn/loss/__init__.py
@@ -1,12 +1,13 @@
 from .dice import DiceLoss, dice_loss
 from .umap import UmapLoss, umap_loss
-from .vae import VaeElboLoss, vae_elbo_loss
+from .vae import VaeLoss, vae_loss
+
 
 __all__ = [
     "DiceLoss",
     "dice_loss",
     "UmapLoss",
     "umap_loss",
-    "VaeElboLoss",
-    "vae_elbo_loss"
+    "VaeLoss",
+    "vae_loss"
 ]

From f62ac369ad6cc9628e661caabfb97eeda8252138 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:15:32 -0400
Subject: [PATCH 008/150] Import Ava in src/vak/nets/__init__.py

---
 src/vak/nets/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vak/nets/__init__.py b/src/vak/nets/__init__.py
index dfd3f5db0..f716f0d16 100644
--- a/src/vak/nets/__init__.py
+++ b/src/vak/nets/__init__.py
@@ -11,5 +11,5 @@
     "ED_TCN",
     "tweetynet",
     "TweetyNet",
-    "Ava",
+    "Ava"
 ]

From 7bb5f4206a836afa8f879bcb8cc8b46c57b031df Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:16:06 -0400
Subject: [PATCH 009/150] Use itemgetter in src/vak/models/vae_model.py

---
 src/vak/models/vae_model.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/vak/models/vae_model.py b/src/vak/models/vae_model.py
index edc80f2bd..96c974c89 100644
--- a/src/vak/models/vae_model.py
+++ b/src/vak/models/vae_model.py
@@ -46,7 +46,8 @@ def training_step(self, batch: tuple, batch_idx: int):
         """
         """
         x = batch[0]
-        out, z, latent_dist= self.network(x)
+        out, _ = self.network(x)
+        z, latent_dist  = itemgetter('z', 'latent_dist')(_)
         loss = self.loss(x, z, out, latent_dist)
         self.log("train_loss", loss)
         return loss

From e1222c3cd645a565a11ee79b6af4a9865bf92d57 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:16:20 -0400
Subject: [PATCH 010/150] Add src/vak/models/ava.py

---
 src/vak/models/ava.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/vak/models/ava.py b/src/vak/models/ava.py
index b4206003a..b64c815c4 100644
--- a/src/vak/models/ava.py
+++ b/src/vak/models/ava.py
@@ -5,17 +5,17 @@
 from .. import metrics, nets
 from .decorator import model
 from .vae_model import VAEModel
-from ..nn.loss import VaeElboLoss
+from ..nn.loss import VaeLoss
 
 @model(family=VAEModel)
 class AVA:
     """
     """
-    network = nets.Ava
-    loss = VaeElboLoss
+    network = Ava
+    loss = VaeLoss
     optimizer = torch.optim.Adam
     metrics = {
-        "loss": VaeElboLoss,
+        "loss": VaeLoss,
         "kl": torch.nn.functional.kl_div
     }
     default_config = {"optimizer": {"lr": 0.003}}
\ No newline at end of file

From 1828ae7d3b77641f2cb2a9e6d592c92dd6a14f3c Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:28:51 -0400
Subject: [PATCH 011/150] Remove use of itemgetter in
 src/vak/models/vae_model.py

---
 src/vak/models/vae_model.py | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/vak/models/vae_model.py b/src/vak/models/vae_model.py
index 96c974c89..edc80f2bd 100644
--- a/src/vak/models/vae_model.py
+++ b/src/vak/models/vae_model.py
@@ -46,8 +46,7 @@ def training_step(self, batch: tuple, batch_idx: int):
         """
         """
         x = batch[0]
-        out, _ = self.network(x)
-        z, latent_dist  = itemgetter('z', 'latent_dist')(_)
+        out, z, latent_dist= self.network(x)
         loss = self.loss(x, z, out, latent_dist)
         self.log("train_loss", loss)
         return loss

From 87668620058f95c52cd39afad03b4a2c71a8c70b Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:29:38 -0400
Subject: [PATCH 012/150] Make changes to src/vak/nets/ava.py suggested during
 meeting

---
 src/vak/nets/ava.py | 163 ++++++++++++++++++++++----------------------
 1 file changed, 83 insertions(+), 80 deletions(-)

diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index bcd804c15..8c1f59178 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -1,8 +1,21 @@
-from __future__ import annotations
+# from __future__ import annotations
 
 import torch
 from torch import nn
 from torch.distributions import LowRankMultivariateNormal
+from typing import Tuple
+
+# Is it necessary to put this in src.vak.nn.modules?
+class BottleneckLayer(nn.Module):
+    def __init__(self, dims):
+        super().__init__()
+        self.layer = nn.Sequential(
+            nn.Linear(dims[0], dims[1]),
+            nn.ReLU(),
+            nn.Linear(dims[1], dims[2]))
+
+    def forward(self, x):
+        return self.layer(x)
 
 
 class Ava(nn.Module):
@@ -10,112 +23,102 @@ class Ava(nn.Module):
     """
     def __init__(
         self,
-        hidden_dims: list[int] = [8, 8, 16, 16, 24, 24, 32],
-		fc_dims: list[int] = [1024, 256, 64, 32],
-		in_channels: int = 1,
-		in_fc: int = 8192,
-		x_shape: tuple = (128, 128)
+        hidden_dims: Tuple[int] = (8, 8, 16, 16, 24, 24),
+        fc_dims: Tuple[int] = (1024, 256, 64),
+        z_dim: int = 32,
+        in_channels: int = 1,
+        x_shape: Tuple[int] = (128, 128)
     ):
         """
         """
         super().__init__()
-        self.in_fc = in_fc
-		self.in_channels = in_channels
-		self.x_shape = x_shape 
-		self.x_dim = torch.prod(x_shape)
-		modules = []
-		for h_dim in hidden_dims:
-			stride = 2 if h_dim == in_channels else 1
+        fc_dims = (*fc_dims, z_dim)
+        hidden_dims = (*hidden_dims, z_dim)
+        
+        self.in_channels = in_channels
+        self.fc_view = (int(fc_dims[-1]),int(fc_dims[-1]/2),int(fc_dims[-1]/2))
+        self.x_shape = torch.tensor(x_shape)
+        self.x_dim = torch.prod(self.x_shape)
+        self.in_fc = int(self.x_dim / 2)
+        in_fc = self.in_fc
+        modules = []
+        for h_dim in hidden_dims:
+            stride = 2 if h_dim == in_channels else 1
             modules.append(
                 nn.Sequential(
-					nn.BatchNorm2d(in_channels),
+                    nn.BatchNorm2d(in_channels),
                     nn.Conv2d(in_channels, out_channels=h_dim,
                               kernel_size=3, stride=stride, padding=1),
                     nn.ReLU())
             )
             in_channels = h_dim
 
-		self.encoder = nn.Sequential(*modules)
-		
-		modules = []
-		for fc_dim in fc_dims[:-2]:
+        self.encoder = nn.Sequential(*modules)
+        
+        modules = []
+        for fc_dim in fc_dims[:-2]:
             modules.append(
                 nn.Sequential(
-					nn.Linear(in_fc, fc_dim),
+                    nn.Linear(in_fc, fc_dim),
                     nn.ReLU())
             )
             in_fc = fc_dim
-		self.encoder_bottleneck = nn.Sequential(*modules)
-
-		self.mu_layer = nn.Sequential(
-			nn.Linear(fc_dims[-3], fc_dims[-2]),
-            nn.ReLU(),
-			nn.Linear(fc_dims[-2], fc_dims[-1]))
-		
-		self.u_layer = nn.Sequential(
-			nn.Linear(fc_dims[-3], fc_dims[-2]),
-            nn.ReLU(),
-			nn.Linear(fc_dims[-2], fc_dims[-1]))
-		
-		self.d_layer = nn.Sequential(
-			nn.Linear(fc_dims[-3], fc_dims[-2]),
-            nn.ReLU(),
-			nn.Linear(fc_dims[-2], fc_dims[-1]))
-
-		fc_dims.reverse()
-		modules = []
-		for i in range(len(fc_dims)):
-			out = self.fc_in if i == len(fc_dims) else fc_dims[i+1]
+        self.encoder_bottleneck = nn.Sequential(*modules)
+        self.mu_layer = BottleneckLayer(fc_dims[-3:])
+        self.cov_factor_layer = BottleneckLayer(fc_dims[-3:])
+        self.cov_diag_layer = BottleneckLayer(fc_dims[-3:])
+        fc_dims = fc_dims[::-1]
+        modules = []
+        for i in range(len(fc_dims)):
+            out = self.in_fc if i == len(fc_dims) - 1 else fc_dims[i+1]
             modules.append(
                 nn.Sequential(
-					nn.Linear(fc_dims[i], out),
+                    nn.Linear(fc_dims[i], out),
                     nn.ReLU())
             )
-		self.decoder_bottleneck = nn.Sequential(*modules)
-        
-		hidden_dims.reverse()
-		modules = []
-		for i, h_dim in enumerate(hidden_dims):
-			stride = 2 if h_dim == in_channels else 1
-			output_padding = 1 if h_dim == in_channels else 0
-            modules.append(
-                nn.Sequential(
-					nn.BatchNorm2d(in_channels),
-                    nn.ConvTranspose2d(in_channels, out_channels=h_dim,
-                              kernel_size=3, stride=stride, padding=1, output_padding=output_padding),
-                    nn.ReLU() if i != len(hidden_dims))
-            )
+        self.decoder_bottleneck = nn.Sequential(*modules)
+        hidden_dims = ( *hidden_dims[-2::-1], self.in_channels)
+        hidden_dims
+        modules = []
+        for i, h_dim in enumerate(hidden_dims):
+            stride = 2 if h_dim == in_channels else 1
+            output_padding = 1 if h_dim == in_channels else 0
+            layers = [ nn.BatchNorm2d(in_channels), 
+                        nn.ConvTranspose2d(in_channels, out_channels=h_dim, kernel_size=3, stride=stride, padding=1, output_padding=output_padding)]
+            if i != len(hidden_dims) - 1:
+                layers.append(nn.ReLU())
+                
+            modules.append( nn.Sequential(*layers) )
             in_channels = h_dim
 
-		self.decoder = nn.Sequential(*modules)
+        self.decoder = nn.Sequential(*modules)
 
     def encode(self, x):
-		"""
-		"""
-		x = self.encoder(x.unsqueeze(self.in_channels)).view(-1, self.in_fc)
-		x = self.encoder_bottleneck(x)
-		mu = self.mu_layer(x)
-		u = self.u_layer(x).unsqueeze(-1)
-		d = torch.exp(self.d_layer(x))
-		z, latent_dist = self.reparametrize(mu, u, d)
-		return z, latent_dist
-
+        """
+        """
+        x = self.encoder(x.unsqueeze(self.in_channels)).view(-1, self.in_fc)
+        x = self.encoder_bottleneck(x)
+        mu = self.mu_layer(x)
+        cov_factor = self.cov_factor_layer(x).unsqueeze(-1)
+        cov_diag = torch.exp(self.cov_diag_layer(x))
+        z, latent_dist = self.reparametrize(mu, cov_factor, cov_diag)
+        return z, latent_dist
 
     def decode(self, z):
-		"""
-		"""
-		z = self.decoder_bottleneck(z).view(-1,32,16,16)
-		z = self.decoder(z).view(-1, x_dim)
-		return z
-
-    def reparametrize(self, mu, u, d):
-        latent_dist = LowRankMultivariateNormal(mu, u, d)
-		z = latent_dist.rsample()
+        """
+        """
+        z = self.decoder_bottleneck(z).view(-1, self.fc_view[0], self.fc_view[1], self.fc_view[2])
+        z = self.decoder(z).view(-1, self.x_dim)
+        return z
+    
+    @staticmethod
+    def reparametrize(mu, cov_factor, cov_diag):
+        latent_dist = LowRankMultivariateNormal(mu, cov_factor, cov_diag)
+        z = latent_dist.rsample()
         return z, latent_dist
 
-
-	def forward(self, x, return_latent_rec=False):
-		z, latent_dist = self.encode(x)
-		x_rec = self.decode(z)
-		return x_rec, {'z': z, 'latent_dist': latent_dist,}
+    def forward(self, x):
+        z, latent_dist = self.encode(x)
+        x_rec = self.decode(z).view(-1, self.x_shape[0], self.x_shape[1])
+        return x_rec, z, latent_dist
 

From dd9949dd48572769182aa76fc94aba9d45a1c836 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:30:07 -0400
Subject: [PATCH 013/150] Add comma at end of __all__ in
 src/vak/nets/__init__.py

---
 src/vak/nets/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vak/nets/__init__.py b/src/vak/nets/__init__.py
index f716f0d16..dfd3f5db0 100644
--- a/src/vak/nets/__init__.py
+++ b/src/vak/nets/__init__.py
@@ -11,5 +11,5 @@
     "ED_TCN",
     "tweetynet",
     "TweetyNet",
-    "Ava"
+    "Ava",
 ]

From 96359bbe18b90b656887969d516cbabee2439033 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:30:44 -0400
Subject: [PATCH 014/150] Rename vae_loss, VaeLoss -> vae_elbo_loss,
 VaeElboLoss

---
 src/vak/nn/loss/__init__.py | 6 +++---
 src/vak/nn/loss/vae.py      | 9 +++++----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/src/vak/nn/loss/__init__.py b/src/vak/nn/loss/__init__.py
index 962338f7c..5435ae048 100644
--- a/src/vak/nn/loss/__init__.py
+++ b/src/vak/nn/loss/__init__.py
@@ -1,6 +1,6 @@
 from .dice import DiceLoss, dice_loss
 from .umap import UmapLoss, umap_loss
-from .vae import VaeLoss, vae_loss
+from .vae import VaeElboLoss, vae_elbo_loss
 
 
 __all__ = [
@@ -8,6 +8,6 @@
     "dice_loss",
     "UmapLoss",
     "umap_loss",
-    "VaeLoss",
-    "vae_loss"
+    "VaeElboLoss",
+    "vae_elbo_loss"
 ]
diff --git a/src/vak/nn/loss/vae.py b/src/vak/nn/loss/vae.py
index e6c993a89..af533aa79 100644
--- a/src/vak/nn/loss/vae.py
+++ b/src/vak/nn/loss/vae.py
@@ -4,8 +4,9 @@
 import math
 import torch
 import numpy as np
-# vak.nn.loss.vae
-def vae_loss(
+
+
+def vae_elbo_loss(
     x: torch.Tensor,
     z: torch.Tensor,
     x_rec: torch.Tensor,
@@ -25,7 +26,7 @@ def vae_loss(
     elbo = elbo + torch.sum(latent_dist.entropy())
     return elbo
 
-class VaeLoss(torch.nn.Module):
+class VaeElboLoss(torch.nn.Module):
     """"""
 
     def __init__(
@@ -47,7 +48,7 @@ def forward(
         latent_dist: torch.Tensor,
     ):
         x_shape = x.shape
-        elbo = vae_loss(x=x, z=z, x_rec=x_rec, latent_dist=latent_dist, model_precision=self.model_precision, z_dim=self.z_dim)
+        elbo = vae_elbo_loss(x=x, z=z, x_rec=x_rec, latent_dist=latent_dist, model_precision=self.model_precision, z_dim=self.z_dim)
         if self.return_latent_rec:
             return -elbo, z.detach().cpu().numpy(), \
                 x_rec.view(-1, x_shape[0], x_shape[1]).detach().cpu().numpy()

From 68fd024edc172bc5d055186377c78490767715a5 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 29 Sep 2023 08:31:50 -0400
Subject: [PATCH 015/150] Fix reference to Ava, VaeLoss -> VaeElboLoss in
 src/vak/models/ava.py

---
 src/vak/models/ava.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/src/vak/models/ava.py b/src/vak/models/ava.py
index b64c815c4..b4206003a 100644
--- a/src/vak/models/ava.py
+++ b/src/vak/models/ava.py
@@ -5,17 +5,17 @@
 from .. import metrics, nets
 from .decorator import model
 from .vae_model import VAEModel
-from ..nn.loss import VaeLoss
+from ..nn.loss import VaeElboLoss
 
 @model(family=VAEModel)
 class AVA:
     """
     """
-    network = Ava
-    loss = VaeLoss
+    network = nets.Ava
+    loss = VaeElboLoss
     optimizer = torch.optim.Adam
     metrics = {
-        "loss": VaeLoss,
+        "loss": VaeElboLoss,
         "kl": torch.nn.functional.kl_div
     }
     default_config = {"optimizer": {"lr": 0.003}}
\ No newline at end of file

From d24bd5fb3e15f7dc8b1040e695990e0ba105869c Mon Sep 17 00:00:00 2001
From: MB <maris.basha1@gmail.com>
Date: Sun, 8 Oct 2023 17:17:27 +0200
Subject: [PATCH 016/150] DEV: Added .vscode to gitignore, renamed Ava to AVA
 in nets and added torchmetrics.KLDivergence on vak.models.AVA

Ensured that the notebook runs properly with no errors.
---
 .gitignore               |  3 ++
 src/vak/models/ava.py    |  6 ++--
 src/vak/nets/__init__.py |  4 +--
 src/vak/nets/ava.py      |  3 +-
 test_vae.ipynb           | 78 ++++++++++++++++++++++++++++++++++++----
 5 files changed, 81 insertions(+), 13 deletions(-)

diff --git a/.gitignore b/.gitignore
index 77e8199f0..b1c557866 100644
--- a/.gitignore
+++ b/.gitignore
@@ -4,6 +4,9 @@
 # IDE
 .idea
 
+# VSCODE
+.vscode
+
 # Jupyter
 .ipynb_checkpoints/
 *checkpoint.ipynb
diff --git a/src/vak/models/ava.py b/src/vak/models/ava.py
index b4206003a..27bb050a5 100644
--- a/src/vak/models/ava.py
+++ b/src/vak/models/ava.py
@@ -1,7 +1,7 @@
 from __future__ import annotations
 
 import torch
-
+from torchmetrics import KLDivergence
 from .. import metrics, nets
 from .decorator import model
 from .vae_model import VAEModel
@@ -11,11 +11,11 @@
 class AVA:
     """
     """
-    network = nets.Ava
+    network = nets.AVA
     loss = VaeElboLoss
     optimizer = torch.optim.Adam
     metrics = {
         "loss": VaeElboLoss,
-        "kl": torch.nn.functional.kl_div
+        "kl": KLDivergence
     }
     default_config = {"optimizer": {"lr": 0.003}}
\ No newline at end of file
diff --git a/src/vak/nets/__init__.py b/src/vak/nets/__init__.py
index dfd3f5db0..22f91d3ea 100644
--- a/src/vak/nets/__init__.py
+++ b/src/vak/nets/__init__.py
@@ -2,7 +2,7 @@
 from .conv_encoder import ConvEncoder
 from .ed_tcn import ED_TCN
 from .tweetynet import TweetyNet
-from .ava import Ava
+from .ava import AVA
 
 __all__ = [
     "conv_encoder",
@@ -11,5 +11,5 @@
     "ED_TCN",
     "tweetynet",
     "TweetyNet",
-    "Ava",
+    "AVA",
 ]
diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index 8c1f59178..045d61aa9 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -17,8 +17,7 @@ def __init__(self, dims):
     def forward(self, x):
         return self.layer(x)
 
-
-class Ava(nn.Module):
+class AVA(nn.Module):
     """
     """
     def __init__(
diff --git a/test_vae.ipynb b/test_vae.ipynb
index 9084cb9d8..99cf2f0b9 100644
--- a/test_vae.ipynb
+++ b/test_vae.ipynb
@@ -2,26 +2,92 @@
  "cells": [
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/Users/mb/Library/Mobile Documents/com~apple~CloudDocs/gits/vak/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
    "source": [
-    "from src.vak.nets.ava import Ava\n",
+    "from src.vak.nets.ava import AVA\n",
     "import torch"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 9,
    "metadata": {},
    "outputs": [],
    "source": [
     "x_shape = (3, 128, 512)\n",
     "input = torch.zeros(x_shape)\n",
-    "net = Ava(x_shape=(x_shape[1], x_shape[2]))\n",
-    "output, _ = net.forward(input)\n",
+    "net = AVA(x_shape=(x_shape[1], x_shape[2]))\n",
+    "output, *_ = net.forward(input)\n",
     "assert output.shape == x_shape, 'Error'"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "(tensor([[[ 0.0208, -0.3291,  0.6882,  ..., -0.0748,  0.4623,  0.3505],\n",
+       "          [ 0.2156, -0.6989, -0.0076,  ...,  0.6886,  0.7483, -0.0168],\n",
+       "          [ 0.2800, -0.6187, -0.4511,  ...,  0.2288,  0.0172,  0.2814],\n",
+       "          ...,\n",
+       "          [ 0.5600, -0.6822, -0.3365,  ...,  0.4444,  0.8699,  0.2028],\n",
+       "          [ 0.5519,  0.2093, -1.0595,  ...,  0.1144,  1.1946, -0.0515],\n",
+       "          [ 1.0651, -0.6337, -0.1924,  ..., -0.6605,  0.0411, -0.4116]],\n",
+       " \n",
+       "         [[-0.1745, -0.5648,  0.6807,  ...,  0.5579,  0.3841,  0.1591],\n",
+       "          [ 1.7102, -0.4414, -0.9502,  ...,  0.4476,  0.2290, -0.1172],\n",
+       "          [ 0.0379, -1.0228, -1.2022,  ...,  0.2531, -0.0263, -0.0254],\n",
+       "          ...,\n",
+       "          [-0.3927, -0.5723, -0.3823,  ...,  0.2884,  0.4463,  0.0611],\n",
+       "          [-0.6336,  2.4851,  0.8446,  ...,  0.0616,  0.8251,  0.0884],\n",
+       "          [ 1.2549, -0.9335, -0.3954,  ..., -0.3838,  0.1691, -0.4288]],\n",
+       " \n",
+       "         [[ 0.2016, -0.4431,  0.7890,  ...,  0.1360,  0.3042,  0.2461],\n",
+       "          [ 0.2871, -0.9145, -1.0309,  ..., -0.3036,  0.1962,  0.1196],\n",
+       "          [-1.3255,  0.1292, -0.9483,  ..., -0.2366, -0.0939, -0.1386],\n",
+       "          ...,\n",
+       "          [-0.6086, -0.3615,  0.3701,  ...,  0.4176,  0.4484,  0.1601],\n",
+       "          [-0.0056,  0.4527,  0.8480,  ..., -0.0258,  0.8970,  0.0453],\n",
+       "          [ 0.1551, -0.7235,  0.1692,  ..., -0.5865, -0.0339, -0.4343]]],\n",
+       "        grad_fn=<ViewBackward0>),\n",
+       " tensor([[ 0.5612, -0.2342,  1.4232, -1.2331,  1.9100, -1.6555,  0.0138,  0.0383,\n",
+       "           1.5190, -0.5999, -0.6995,  0.0797, -1.0059, -1.1228,  0.8876, -1.4480,\n",
+       "           0.9887,  0.3826, -0.1049,  0.7349, -0.1471, -1.6406,  0.9153, -0.8811,\n",
+       "          -0.1230,  0.7922, -0.9518, -0.2269,  0.6329, -0.1100, -0.4484, -0.4431],\n",
+       "         [ 0.5327,  1.1248,  0.6831,  0.1341, -0.3701, -0.0730,  0.6215, -0.8710,\n",
+       "           1.0366, -2.4440, -0.9182, -0.1998, -2.7722,  0.0281, -0.4384,  1.1796,\n",
+       "          -1.8119, -0.1231, -0.2017,  0.6020, -0.4630, -1.2014, -1.8448,  1.0045,\n",
+       "           0.1432,  0.8541, -0.4479,  1.1177, -0.5499, -0.0604, -0.3624, -1.1774],\n",
+       "         [-0.7375,  0.2821, -0.4936, -0.9686, -0.5220,  0.2299, -1.2951, -2.8860,\n",
+       "           0.1652, -1.1760, -0.1425, -0.3584, -0.3337,  0.2257,  0.6433,  0.9687,\n",
+       "           0.6157, -1.7252,  0.2653,  0.2703,  2.2893, -0.9009,  1.5419, -0.6750,\n",
+       "           0.2721, -0.5518,  0.8769,  1.4670, -1.2138, -0.3730,  1.1191, -0.6870]],\n",
+       "        grad_fn=<AddBackward0>),\n",
+       " LowRankMultivariateNormal(loc: torch.Size([3, 32]), cov_factor: torch.Size([3, 32, 1]), cov_diag: torch.Size([3, 32])))"
+      ]
+     },
+     "execution_count": 4,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "output"
+   ]
   }
  ],
  "metadata": {

From 56dab3bb8913ba52fe6313b9188aae94810fcff3 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 18 Oct 2023 18:40:15 -0400
Subject: [PATCH 017/150] Put .vscode under IDE in .gitignore

---
 .gitignore | 2 --
 1 file changed, 2 deletions(-)

diff --git a/.gitignore b/.gitignore
index b1c557866..839333dc3 100644
--- a/.gitignore
+++ b/.gitignore
@@ -3,8 +3,6 @@
 
 # IDE
 .idea
-
-# VSCODE
 .vscode
 
 # Jupyter

From 0e0588f96923895c6c5d33e3f0e055ffa2743e85 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 18 Oct 2023 18:42:49 -0400
Subject: [PATCH 018/150] Remove test_vae.ipynb

---
 test_vae.ipynb | 115 -------------------------------------------------
 1 file changed, 115 deletions(-)
 delete mode 100644 test_vae.ipynb

diff --git a/test_vae.ipynb b/test_vae.ipynb
deleted file mode 100644
index 99cf2f0b9..000000000
--- a/test_vae.ipynb
+++ /dev/null
@@ -1,115 +0,0 @@
-{
- "cells": [
-  {
-   "cell_type": "code",
-   "execution_count": 1,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "/Users/mb/Library/Mobile Documents/com~apple~CloudDocs/gits/vak/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
-      "  from .autonotebook import tqdm as notebook_tqdm\n"
-     ]
-    }
-   ],
-   "source": [
-    "from src.vak.nets.ava import AVA\n",
-    "import torch"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 9,
-   "metadata": {},
-   "outputs": [],
-   "source": [
-    "x_shape = (3, 128, 512)\n",
-    "input = torch.zeros(x_shape)\n",
-    "net = AVA(x_shape=(x_shape[1], x_shape[2]))\n",
-    "output, *_ = net.forward(input)\n",
-    "assert output.shape == x_shape, 'Error'"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 4,
-   "metadata": {},
-   "outputs": [
-    {
-     "data": {
-      "text/plain": [
-       "(tensor([[[ 0.0208, -0.3291,  0.6882,  ..., -0.0748,  0.4623,  0.3505],\n",
-       "          [ 0.2156, -0.6989, -0.0076,  ...,  0.6886,  0.7483, -0.0168],\n",
-       "          [ 0.2800, -0.6187, -0.4511,  ...,  0.2288,  0.0172,  0.2814],\n",
-       "          ...,\n",
-       "          [ 0.5600, -0.6822, -0.3365,  ...,  0.4444,  0.8699,  0.2028],\n",
-       "          [ 0.5519,  0.2093, -1.0595,  ...,  0.1144,  1.1946, -0.0515],\n",
-       "          [ 1.0651, -0.6337, -0.1924,  ..., -0.6605,  0.0411, -0.4116]],\n",
-       " \n",
-       "         [[-0.1745, -0.5648,  0.6807,  ...,  0.5579,  0.3841,  0.1591],\n",
-       "          [ 1.7102, -0.4414, -0.9502,  ...,  0.4476,  0.2290, -0.1172],\n",
-       "          [ 0.0379, -1.0228, -1.2022,  ...,  0.2531, -0.0263, -0.0254],\n",
-       "          ...,\n",
-       "          [-0.3927, -0.5723, -0.3823,  ...,  0.2884,  0.4463,  0.0611],\n",
-       "          [-0.6336,  2.4851,  0.8446,  ...,  0.0616,  0.8251,  0.0884],\n",
-       "          [ 1.2549, -0.9335, -0.3954,  ..., -0.3838,  0.1691, -0.4288]],\n",
-       " \n",
-       "         [[ 0.2016, -0.4431,  0.7890,  ...,  0.1360,  0.3042,  0.2461],\n",
-       "          [ 0.2871, -0.9145, -1.0309,  ..., -0.3036,  0.1962,  0.1196],\n",
-       "          [-1.3255,  0.1292, -0.9483,  ..., -0.2366, -0.0939, -0.1386],\n",
-       "          ...,\n",
-       "          [-0.6086, -0.3615,  0.3701,  ...,  0.4176,  0.4484,  0.1601],\n",
-       "          [-0.0056,  0.4527,  0.8480,  ..., -0.0258,  0.8970,  0.0453],\n",
-       "          [ 0.1551, -0.7235,  0.1692,  ..., -0.5865, -0.0339, -0.4343]]],\n",
-       "        grad_fn=<ViewBackward0>),\n",
-       " tensor([[ 0.5612, -0.2342,  1.4232, -1.2331,  1.9100, -1.6555,  0.0138,  0.0383,\n",
-       "           1.5190, -0.5999, -0.6995,  0.0797, -1.0059, -1.1228,  0.8876, -1.4480,\n",
-       "           0.9887,  0.3826, -0.1049,  0.7349, -0.1471, -1.6406,  0.9153, -0.8811,\n",
-       "          -0.1230,  0.7922, -0.9518, -0.2269,  0.6329, -0.1100, -0.4484, -0.4431],\n",
-       "         [ 0.5327,  1.1248,  0.6831,  0.1341, -0.3701, -0.0730,  0.6215, -0.8710,\n",
-       "           1.0366, -2.4440, -0.9182, -0.1998, -2.7722,  0.0281, -0.4384,  1.1796,\n",
-       "          -1.8119, -0.1231, -0.2017,  0.6020, -0.4630, -1.2014, -1.8448,  1.0045,\n",
-       "           0.1432,  0.8541, -0.4479,  1.1177, -0.5499, -0.0604, -0.3624, -1.1774],\n",
-       "         [-0.7375,  0.2821, -0.4936, -0.9686, -0.5220,  0.2299, -1.2951, -2.8860,\n",
-       "           0.1652, -1.1760, -0.1425, -0.3584, -0.3337,  0.2257,  0.6433,  0.9687,\n",
-       "           0.6157, -1.7252,  0.2653,  0.2703,  2.2893, -0.9009,  1.5419, -0.6750,\n",
-       "           0.2721, -0.5518,  0.8769,  1.4670, -1.2138, -0.3730,  1.1191, -0.6870]],\n",
-       "        grad_fn=<AddBackward0>),\n",
-       " LowRankMultivariateNormal(loc: torch.Size([3, 32]), cov_factor: torch.Size([3, 32, 1]), cov_diag: torch.Size([3, 32])))"
-      ]
-     },
-     "execution_count": 4,
-     "metadata": {},
-     "output_type": "execute_result"
-    }
-   ],
-   "source": [
-    "output"
-   ]
-  }
- ],
- "metadata": {
-  "kernelspec": {
-   "display_name": "Python 3",
-   "language": "python",
-   "name": "python3"
-  },
-  "language_info": {
-   "codemirror_mode": {
-    "name": "ipython",
-    "version": 3
-   },
-   "file_extension": ".py",
-   "mimetype": "text/x-python",
-   "name": "python",
-   "nbconvert_exporter": "python",
-   "pygments_lexer": "ipython3",
-   "version": "3.10.13"
-  },
-  "orig_nbformat": 4
- },
- "nbformat": 4,
- "nbformat_minor": 2
-}

From 077bf990e43e57b0a8fb54d983cdf5c7ba01171d Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 18 Oct 2023 19:36:35 -0400
Subject: [PATCH 019/150] Uncomment from __future__ import annotations in
 vak/nets/ava.py

---
 src/vak/nets/ava.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index 045d61aa9..840a58c7c 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -1,4 +1,4 @@
-# from __future__ import annotations
+from __future__ import annotations
 
 import torch
 from torch import nn

From 6e118fde394262e74f8dfa94483bcad6df5c49a2 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 18 Oct 2023 19:37:08 -0400
Subject: [PATCH 020/150] Remove `src` from comment in vak/nets/ava.py

---
 src/vak/nets/ava.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index 840a58c7c..c4917649f 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -5,7 +5,7 @@
 from torch.distributions import LowRankMultivariateNormal
 from typing import Tuple
 
-# Is it necessary to put this in src.vak.nn.modules?
+# Is it necessary to put this in vak.nn.modules?
 class BottleneckLayer(nn.Module):
     def __init__(self, dims):
         super().__init__()

From 6b4d16f50b7539c37617b95c6fb4defbad94e070 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 18 Oct 2023 19:37:37 -0400
Subject: [PATCH 021/150] Use network attributes 'encoder' and 'decoder' in
 VAEModel

---
 src/vak/models/vae_model.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/vak/models/vae_model.py b/src/vak/models/vae_model.py
index edc80f2bd..102d7f834 100644
--- a/src/vak/models/vae_model.py
+++ b/src/vak/models/vae_model.py
@@ -26,18 +26,16 @@ def __init__(
         super().__init__(
             network=network, loss=loss, optimizer=optimizer, metrics=metrics
         )
-        self.encoder = network['encode']
-        self.decoder = network['decode']
 
     def forward(self, x):
         out, _ = self.network(x)
         return out
 
     def encode(self, x):
-        return self.encoder(x)
+        return self.network.encoder(x)
     
     def decode(self, x):
-        return self.decoder(x)
+        return self.network.decoder(x)
 
     def configure_optimizers(self):
         return self.optimizer

From b26766ad9dd96bd6be82bf7d4a41ef606382a788 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 18 Oct 2023 19:54:12 -0400
Subject: [PATCH 022/150] Modify AVA network to have input_shape parameter

---
 src/vak/nets/ava.py | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index c4917649f..d1e3a043d 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -1,9 +1,9 @@
 from __future__ import annotations
 
+import numpy as np
 import torch
 from torch import nn
 from torch.distributions import LowRankMultivariateNormal
-from typing import Tuple
 
 # Is it necessary to put this in vak.nn.modules?
 class BottleneckLayer(nn.Module):
@@ -17,27 +17,27 @@ def __init__(self, dims):
     def forward(self, x):
         return self.layer(x)
 
+
 class AVA(nn.Module):
     """
     """
     def __init__(
         self,
-        hidden_dims: Tuple[int] = (8, 8, 16, 16, 24, 24),
-        fc_dims: Tuple[int] = (1024, 256, 64),
+        hidden_dims: tuple[int] = (8, 8, 16, 16, 24, 24),
+        fc_dims: tuple[int] = (1024, 256, 64),
         z_dim: int = 32,
-        in_channels: int = 1,
-        x_shape: Tuple[int] = (128, 128)
+        input_shape: tuple[int] = (1, 128, 128),
     ):
         """
         """
         super().__init__()
         fc_dims = (*fc_dims, z_dim)
         hidden_dims = (*hidden_dims, z_dim)
-        
-        self.in_channels = in_channels
+
+        self.in_channels = input_shape[0]
         self.fc_view = (int(fc_dims[-1]),int(fc_dims[-1]/2),int(fc_dims[-1]/2))
-        self.x_shape = torch.tensor(x_shape)
-        self.x_dim = torch.prod(self.x_shape)
+        self.input_shape = input_shape
+        self.x_dim = np.prod(self.input_shape[1:])
         self.in_fc = int(self.x_dim / 2)
         in_fc = self.in_fc
         modules = []
@@ -53,7 +53,7 @@ def __init__(
             in_channels = h_dim
 
         self.encoder = nn.Sequential(*modules)
-        
+
         modules = []
         for fc_dim in fc_dims[:-2]:
             modules.append(
@@ -77,7 +77,7 @@ def __init__(
             )
         self.decoder_bottleneck = nn.Sequential(*modules)
         hidden_dims = ( *hidden_dims[-2::-1], self.in_channels)
-        hidden_dims
+
         modules = []
         for i, h_dim in enumerate(hidden_dims):
             stride = 2 if h_dim == in_channels else 1

From 9425df223c2c2dfda5baf72344f9584ea22c3c3c Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 18 Oct 2023 19:54:35 -0400
Subject: [PATCH 023/150] Modify vak.models.get to handle VAEModel

---
 src/vak/models/get.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/vak/models/get.py b/src/vak/models/get.py
index e4cc3ab06..91af9b860 100644
--- a/src/vak/models/get.py
+++ b/src/vak/models/get.py
@@ -97,6 +97,14 @@ def get(
                 config["network"]["encoder"] = dict(input_shape=input_shape)
 
         model = model_class.from_config(config=config)
+    elif model_family == "VAEModel":
+        net_init_params = list(
+            inspect.signature(
+                model_class.definition.network.__init__
+            ).parameters.keys()
+        )
+        if "input_shape" in net_init_params:
+            config["network"]["input_shape"] = input_shape
     else:
         raise ValueError(
             f"Value for ``model_family`` not recognized: {model_family}"

From 7c600d6e36057332de5469fe0f67294dbdff88f0 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 18 Oct 2023 20:02:52 -0400
Subject: [PATCH 024/150] Add variable + comments in nets.AVA.__init__

---
 src/vak/nets/ava.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index d1e3a043d..dd5281858 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -40,6 +40,9 @@ def __init__(
         self.x_dim = np.prod(self.input_shape[1:])
         self.in_fc = int(self.x_dim / 2)
         in_fc = self.in_fc
+
+        # ---- build encoder
+        in_channels = self.in_channels
         modules = []
         for h_dim in hidden_dims:
             stride = 2 if h_dim == in_channels else 1
@@ -54,6 +57,7 @@ def __init__(
 
         self.encoder = nn.Sequential(*modules)
 
+        # ---- build encoder bottleneck
         modules = []
         for fc_dim in fc_dims[:-2]:
             modules.append(
@@ -63,9 +67,12 @@ def __init__(
             )
             in_fc = fc_dim
         self.encoder_bottleneck = nn.Sequential(*modules)
+
         self.mu_layer = BottleneckLayer(fc_dims[-3:])
         self.cov_factor_layer = BottleneckLayer(fc_dims[-3:])
         self.cov_diag_layer = BottleneckLayer(fc_dims[-3:])
+
+        # ---- build decoder bottleneck
         fc_dims = fc_dims[::-1]
         modules = []
         for i in range(len(fc_dims)):
@@ -76,8 +83,9 @@ def __init__(
                     nn.ReLU())
             )
         self.decoder_bottleneck = nn.Sequential(*modules)
-        hidden_dims = ( *hidden_dims[-2::-1], self.in_channels)
 
+        # ---- build decoder
+        hidden_dims = ( *hidden_dims[-2::-1], self.in_channels)
         modules = []
         for i, h_dim in enumerate(hidden_dims):
             stride = 2 if h_dim == in_channels else 1

From a4579f82101370cc513788579a2e30392487ea4d Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 18 Oct 2023 20:03:05 -0400
Subject: [PATCH 025/150] Fix how we handle VAEModel in vak.models.get

---
 src/vak/models/get.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/vak/models/get.py b/src/vak/models/get.py
index 91af9b860..8d1ba9480 100644
--- a/src/vak/models/get.py
+++ b/src/vak/models/get.py
@@ -105,6 +105,8 @@ def get(
         )
         if "input_shape" in net_init_params:
             config["network"]["input_shape"] = input_shape
+
+        model = model_class.from_config(config=config)
     else:
         raise ValueError(
             f"Value for ``model_family`` not recognized: {model_family}"

From 1c529e5053b6d870b1734a2300ec2f28a8fce74e Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 18 Oct 2023 20:04:49 -0400
Subject: [PATCH 026/150] Fix how VAEModel unpacks 'x' in training_step and
 validation_step

---
 src/vak/models/vae_model.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/vak/models/vae_model.py b/src/vak/models/vae_model.py
index 102d7f834..72b320299 100644
--- a/src/vak/models/vae_model.py
+++ b/src/vak/models/vae_model.py
@@ -52,8 +52,7 @@ def training_step(self, batch: tuple, batch_idx: int):
     def training_step(self, batch: tuple, batch_idx: int):
         """
         """
-        x = batch[0]
-        x = batch[0]
+        x = batch["x"]
         out, _ = self.network(x)
         z, latent_dist  = itemgetter('z', 'latent_dist')(_)
         loss = self.loss(x, z, out, latent_dist)
@@ -61,8 +60,7 @@ def training_step(self, batch: tuple, batch_idx: int):
         return loss
 
     def validation_step(self, batch: tuple, batch_idx: int):
-        x = batch["frames"]
-        x = batch[0]
+        x = batch["x"]
         out, _ = self.network(x)
         z, latent_dist  = itemgetter('z', 'latent_dist')(_)
         for metric_name, metric_callable in self.metrics.items():

From d3b6220e13892e1cea2d6d1d88125ab672f3089a Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 18 Oct 2023 20:07:05 -0400
Subject: [PATCH 027/150] Add train-ava.ipynb

---
 train-ava.ipynb | 365 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 365 insertions(+)
 create mode 100644 train-ava.ipynb

diff --git a/train-ava.ipynb b/train-ava.ipynb
new file mode 100644
index 000000000..abe21ef16
--- /dev/null
+++ b/train-ava.ipynb
@@ -0,0 +1,365 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "c88ee0dc-7579-40af-9c5d-68bef9b30c49",
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "/home/pimienta/Documents/repos/coding/vocalpy/vak-vocalpy/.venv/lib/python3.10/site-packages/tqdm/auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
+      "  from .autonotebook import tqdm as notebook_tqdm\n"
+     ]
+    }
+   ],
+   "source": [
+    "from __future__ import annotations\n",
+    "\n",
+    "import datetime\n",
+    "import logging\n",
+    "import pathlib\n",
+    "\n",
+    "import numpy as np\n",
+    "import pandas as pd\n",
+    "import pytorch_lightning as lightning\n",
+    "import torch.utils.data\n",
+    "\n",
+    "from src import vak"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "190f11c3-c115-408e-92de-3d87cd421748",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_split_dur(df: pd.DataFrame, split: str) -> float:\n",
+    "    \"\"\"Get duration of a split in a dataset from a pandas DataFrame representing the dataset.\"\"\"\n",
+    "    return df[df[\"split\"] == split][\"duration\"].sum()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "166b6c26-ea53-46cc-82df-0f3057c801b8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def get_trainer(\n",
+    "    max_epochs: int,\n",
+    "    ckpt_root: str | pathlib.Path,\n",
+    "    ckpt_step: int,\n",
+    "    log_save_dir: str | pathlib.Path,\n",
+    "    device: str = \"cuda\",\n",
+    ") -> lightning.Trainer:\n",
+    "    \"\"\"Returns an instance of ``lightning.Trainer``\n",
+    "    with a default set of callbacks.\n",
+    "    Used by ``vak.core`` functions.\"\"\"\n",
+    "    # TODO: use accelerator parameter, https://github.com/vocalpy/vak/issues/691\n",
+    "    if device == \"cuda\":\n",
+    "        accelerator = \"gpu\"\n",
+    "    else:\n",
+    "        accelerator = \"auto\"\n",
+    "\n",
+    "    ckpt_callback = lightning.callbacks.ModelCheckpoint(\n",
+    "        dirpath=ckpt_root,\n",
+    "        filename=\"checkpoint\",\n",
+    "        every_n_train_steps=ckpt_step,\n",
+    "        save_last=True,\n",
+    "        verbose=True,\n",
+    "    )\n",
+    "    ckpt_callback.CHECKPOINT_NAME_LAST = \"checkpoint\"\n",
+    "    ckpt_callback.FILE_EXTENSION = \".pt\"\n",
+    "\n",
+    "    val_ckpt_callback = lightning.callbacks.ModelCheckpoint(\n",
+    "        monitor=\"val_loss\",\n",
+    "        dirpath=ckpt_root,\n",
+    "        save_top_k=1,\n",
+    "        mode=\"min\",\n",
+    "        filename=\"min-val-loss-checkpoint\",\n",
+    "        auto_insert_metric_name=False,\n",
+    "        verbose=True,\n",
+    "    )\n",
+    "    val_ckpt_callback.FILE_EXTENSION = \".pt\"\n",
+    "\n",
+    "    callbacks = [\n",
+    "        ckpt_callback,\n",
+    "        val_ckpt_callback,\n",
+    "    ]\n",
+    "\n",
+    "    logger = lightning.loggers.TensorBoardLogger(save_dir=log_save_dir)\n",
+    "\n",
+    "    trainer = lightning.Trainer(\n",
+    "        max_epochs=max_epochs,\n",
+    "        accelerator=accelerator,\n",
+    "        logger=logger,\n",
+    "        callbacks=callbacks,\n",
+    "    )\n",
+    "    return trainer"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fcf49554-27a4-4baf-b5ae-a2fb9288f4e4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "class SpectrogramPipe(torch.utils.data.Dataset):\n",
+    "    \"\"\"Pipeline for loading samples from a dataset of spectrograms\n",
+    "    \n",
+    "    This is a simplified version of ``vak.datasets.parametric_umap.ParametricUmapInferenceDataset``.\n",
+    "    \"\"\"\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        data: npt.NDArray,\n",
+    "        dataset_df: pd.DataFrame,\n",
+    "        transform: Callable | None = None,\n",
+    "    ):\n",
+    "        self.data = data\n",
+    "        self.dataset_df = dataset_df\n",
+    "        self.transform = transform\n",
+    "\n",
+    "    @property\n",
+    "    def duration(self):\n",
+    "        return self.dataset_df[\"duration\"].sum()\n",
+    "\n",
+    "    def __len__(self):\n",
+    "        return self.data.shape[0]\n",
+    "\n",
+    "    @property\n",
+    "    def shape(self):\n",
+    "        tmp_x_ind = 0\n",
+    "        tmp_item = self.__getitem__(tmp_x_ind)\n",
+    "        return tmp_item[\"x\"].shape\n",
+    "\n",
+    "    def __getitem__(self, index):\n",
+    "        x = self.data[index]\n",
+    "        df_index = self.dataset_df.index[index]\n",
+    "        if self.transform:\n",
+    "            x = self.transform(x)\n",
+    "        return {\"x\": x, \"df_index\": df_index}\n",
+    "\n",
+    "    @classmethod\n",
+    "    def from_dataset_path(\n",
+    "        cls,\n",
+    "        dataset_path: str | pathlib.Path,\n",
+    "        split: str,\n",
+    "        transform: Callable | None = None,\n",
+    "    ):\n",
+    "        import vak.datasets  # import here just to make classmethod more explicit\n",
+    "\n",
+    "        dataset_path = pathlib.Path(dataset_path)\n",
+    "        metadata = vak.datasets.parametric_umap.Metadata.from_dataset_path(\n",
+    "            dataset_path\n",
+    "        )\n",
+    "\n",
+    "        dataset_csv_path = dataset_path / metadata.dataset_csv_filename\n",
+    "        dataset_df = pd.read_csv(dataset_csv_path)\n",
+    "        split_df = dataset_df[dataset_df.split == split]\n",
+    "\n",
+    "        data = np.stack(\n",
+    "            [\n",
+    "                np.load(dataset_path / spect_path)\n",
+    "                for spect_path in split_df.spect_path.values\n",
+    "            ]\n",
+    "        )\n",
+    "        return cls(\n",
+    "            data,\n",
+    "            split_df,\n",
+    "            transform=transform,\n",
+    "        )"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "f1c9c5bb-0b94-4649-80f7-db3185e9b480",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "dataset_path = pathlib.Path(\n",
+    "    './tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat/ConvEncoderUMAP/032312-vak-dimensionality-reduction-dataset-generated-231010_165846/'\n",
+    ")\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "7b1a3d7b-dd9c-4977-9de1-bc31049a06ca",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "metadata = vak.datasets.parametric_umap.Metadata.from_dataset_path(\n",
+    "    dataset_path\n",
+    ")\n",
+    "dataset_csv_path = dataset_path / metadata.dataset_csv_filename\n",
+    "dataset_df = pd.read_csv(dataset_csv_path)\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "565cdc37-738f-42a1-b65d-4348fd567d99",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "val_step = 2000"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "3d4a4216-ebba-40ea-ab26-8a5d88211c4d",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "results_path = pathlib.Path(\n",
+    "    './tests/data_for_tests/generated/results/train/audio_cbin_annot_notmat/AVA'\n",
+    ")\n",
+    "results_path.mkdir(exist_ok=True)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "0f0ac2fb-fa28-4645-8397-50f5cc1c6bb8",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ---------------- load training data  -----------------------------------------------------------------------------\n",
+    "\n",
+    "# below, if we're going to train network to predict unlabeled segments, then\n",
+    "# we need to include a class for those unlabeled segments in labelmap,\n",
+    "# the mapping from labelset provided by user to a set of consecutive\n",
+    "# integers that the network learns to predict\n",
+    "train_dur = get_split_dur(dataset_df, \"train\")\n",
+    "print(\n",
+    "    f\"Total duration of training split from dataset (in s): {train_dur}\",\n",
+    ")\n",
+    "\n",
+    "\n",
+    "train_transform_params = {}\n",
+    "transform = vak.transforms.defaults.get_default_transform(\n",
+    "    \"ConvEncoderUMAP\", \"train\", train_transform_params\n",
+    ")\n",
+    "\n",
+    "\n",
+    "train_dataset_params = {}\n",
+    "train_dataset = SpectrogramPipe.from_dataset_path(\n",
+    "    dataset_path=dataset_path,\n",
+    "    split=\"train\",\n",
+    "    transform=transform,\n",
+    "    **train_dataset_params,\n",
+    ")\n",
+    "\n",
+    "train_loader = torch.utils.data.DataLoader(\n",
+    "    dataset=train_dataset,\n",
+    "    shuffle=True,\n",
+    "    batch_size=64,\n",
+    "    num_workers=16,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "925e06c9-91f8-4614-b56d-56b07e12f564",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# ---------------- load validation set (if there is one) -----------------------------------------------------------\n",
+    "\n",
+    "\n",
+    "val_transform_params = {}\n",
+    "transform = vak.transforms.defaults.get_default_transform(\n",
+    "    \"ConvEncoderUMAP\", \"eval\", val_transform_params\n",
+    ")\n",
+    "val_dataset_params = {}\n",
+    "val_dataset = SpectrogramPipe.from_dataset_path(\n",
+    "    dataset_path=dataset_path,\n",
+    "    split=\"val\",\n",
+    "    transform=transform,\n",
+    "    **val_dataset_params,\n",
+    ")\n",
+    "print(\n",
+    "    f\"Duration of ParametricUMAPDataset used for validation, in seconds: {val_dataset.duration}\",\n",
+    ")\n",
+    "val_loader = torch.utils.data.DataLoader(\n",
+    "    dataset=val_dataset,\n",
+    "    shuffle=False,\n",
+    "    batch_size=64,\n",
+    "    num_workers=16,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "6cb39ce3-72db-4b53-b4b9-a08b449b37d4",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "device = vak.common.device.get_default()\n",
+    "\n",
+    "model = vak.models.get(\n",
+    "    \"AVA\",\n",
+    "    config={\"network\": {}, \"optimizer\": {\"lr\": 0.001}},\n",
+    "    input_shape=train_dataset.shape,\n",
+    ")\n",
+    "\n",
+    "results_model_root = results_path.joinpath(\"AVA\")\n",
+    "results_model_root.mkdir(exist_ok=True)\n",
+    "ckpt_root = results_model_root.joinpath(\"checkpoints\")\n",
+    "ckpt_root.mkdir(exist_ok=True)\n",
+    "\n",
+    "trainer = get_trainer(\n",
+    "    max_epochs=50,\n",
+    "    log_save_dir=results_model_root,\n",
+    "    device=device,\n",
+    "    ckpt_root=ckpt_root,\n",
+    "    ckpt_step=250,\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "id": "fe993c8b-4571-4892-b645-91d778df2fb6",
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "trainer.fit(\n",
+    "    model=model,\n",
+    "    train_dataloaders=train_loader,\n",
+    "    val_dataloaders=val_loader,\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3 (ipykernel)",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.12"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}

From 60a9b8bfd0a24a9c2dbd32b2bc958d3fc3bf6677 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 29 Sep 2023 09:06:10 -0400
Subject: [PATCH 028/150] Move test_vae.ipynb to src/scripts

---
 src/scripts/test_vae.ipynb | 49 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 49 insertions(+)
 create mode 100644 src/scripts/test_vae.ipynb

diff --git a/src/scripts/test_vae.ipynb b/src/scripts/test_vae.ipynb
new file mode 100644
index 000000000..9084cb9d8
--- /dev/null
+++ b/src/scripts/test_vae.ipynb
@@ -0,0 +1,49 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from src.vak.nets.ava import Ava\n",
+    "import torch"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 8,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "x_shape = (3, 128, 512)\n",
+    "input = torch.zeros(x_shape)\n",
+    "net = Ava(x_shape=(x_shape[1], x_shape[2]))\n",
+    "output, _ = net.forward(input)\n",
+    "assert output.shape == x_shape, 'Error'"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.13"
+  },
+  "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From fc61d1052928d16a1fb277eb992d44da46ad80a6 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Mon, 20 Nov 2023 19:49:11 -0500
Subject: [PATCH 029/150] Have nets.Ava use input_shape parameter, revise code
 slightly

---
 src/vak/nets/ava.py | 51 +++++++++++++++++++++++++--------------------
 1 file changed, 28 insertions(+), 23 deletions(-)

diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index dd5281858..f4400ac0c 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -5,7 +5,7 @@
 from torch import nn
 from torch.distributions import LowRankMultivariateNormal
 
-# Is it necessary to put this in vak.nn.modules?
+
 class BottleneckLayer(nn.Module):
     def __init__(self, dims):
         super().__init__()
@@ -18,15 +18,15 @@ def forward(self, x):
         return self.layer(x)
 
 
-class AVA(nn.Module):
+class Ava(nn.Module):
     """
     """
     def __init__(
         self,
+        input_shape: tuple[int] = (1, 128, 128),
         hidden_dims: tuple[int] = (8, 8, 16, 16, 24, 24),
         fc_dims: tuple[int] = (1024, 256, 64),
         z_dim: int = 32,
-        input_shape: tuple[int] = (1, 128, 128),
     ):
         """
         """
@@ -34,36 +34,40 @@ def __init__(
         fc_dims = (*fc_dims, z_dim)
         hidden_dims = (*hidden_dims, z_dim)
 
+        self.input_shape = input_shape
         self.in_channels = input_shape[0]
         self.fc_view = (int(fc_dims[-1]),int(fc_dims[-1]/2),int(fc_dims[-1]/2))
-        self.input_shape = input_shape
-        self.x_dim = np.prod(self.input_shape[1:])
+        self.x_shape = input_shape[1:]
+        self.x_dim = np.prod(self.x_shape)
         self.in_fc = int(self.x_dim / 2)
-        in_fc = self.in_fc
 
         # ---- build encoder
-        in_channels = self.in_channels
         modules = []
+        in_channels = self.in_channels
         for h_dim in hidden_dims:
             stride = 2 if h_dim == in_channels else 1
             modules.append(
                 nn.Sequential(
                     nn.BatchNorm2d(in_channels),
-                    nn.Conv2d(in_channels, out_channels=h_dim,
-                              kernel_size=3, stride=stride, padding=1),
-                    nn.ReLU())
+                    nn.Conv2d(
+                        in_channels, out_channels=h_dim,
+                        kernel_size=3, stride=stride, padding=1
+                    ),
+                    nn.ReLU()
+                )
             )
             in_channels = h_dim
-
         self.encoder = nn.Sequential(*modules)
 
         # ---- build encoder bottleneck
         modules = []
+        in_fc = self.in_fc
         for fc_dim in fc_dims[:-2]:
             modules.append(
                 nn.Sequential(
                     nn.Linear(in_fc, fc_dim),
-                    nn.ReLU())
+                    nn.ReLU()
+                )
             )
             in_fc = fc_dim
         self.encoder_bottleneck = nn.Sequential(*modules)
@@ -73,37 +77,39 @@ def __init__(
         self.cov_diag_layer = BottleneckLayer(fc_dims[-3:])
 
         # ---- build decoder bottleneck
-        fc_dims = fc_dims[::-1]
         modules = []
+        fc_dims = fc_dims[::-1]
         for i in range(len(fc_dims)):
             out = self.in_fc if i == len(fc_dims) - 1 else fc_dims[i+1]
             modules.append(
                 nn.Sequential(
                     nn.Linear(fc_dims[i], out),
-                    nn.ReLU())
+                    nn.ReLU()
+                )
             )
         self.decoder_bottleneck = nn.Sequential(*modules)
 
         # ---- build decoder
-        hidden_dims = ( *hidden_dims[-2::-1], self.in_channels)
         modules = []
+        hidden_dims = (*hidden_dims[-2::-1], self.in_channels)
         for i, h_dim in enumerate(hidden_dims):
             stride = 2 if h_dim == in_channels else 1
             output_padding = 1 if h_dim == in_channels else 0
-            layers = [ nn.BatchNorm2d(in_channels), 
-                        nn.ConvTranspose2d(in_channels, out_channels=h_dim, kernel_size=3, stride=stride, padding=1, output_padding=output_padding)]
+            layers = [nn.BatchNorm2d(in_channels),
+                      nn.ConvTranspose2d(
+                          in_channels, out_channels=h_dim,
+                          kernel_size=3, stride=stride, padding=1, output_padding=output_padding
+                      )]
             if i != len(hidden_dims) - 1:
                 layers.append(nn.ReLU())
-                
-            modules.append( nn.Sequential(*layers) )
+            modules.append(nn.Sequential(*layers))
             in_channels = h_dim
-
         self.decoder = nn.Sequential(*modules)
 
     def encode(self, x):
         """
         """
-        x = self.encoder(x.unsqueeze(self.in_channels)).view(-1, self.in_fc)
+        x = self.encoder(x.view(-1, self.in_fc))
         x = self.encoder_bottleneck(x)
         mu = self.mu_layer(x)
         cov_factor = self.cov_factor_layer(x).unsqueeze(-1)
@@ -117,7 +123,7 @@ def decode(self, z):
         z = self.decoder_bottleneck(z).view(-1, self.fc_view[0], self.fc_view[1], self.fc_view[2])
         z = self.decoder(z).view(-1, self.x_dim)
         return z
-    
+
     @staticmethod
     def reparametrize(mu, cov_factor, cov_diag):
         latent_dist = LowRankMultivariateNormal(mu, cov_factor, cov_diag)
@@ -128,4 +134,3 @@ def forward(self, x):
         z, latent_dist = self.encode(x)
         x_rec = self.decode(z).view(-1, self.x_shape[0], self.x_shape[1])
         return x_rec, z, latent_dist
-

From 70c0c13716b65f4305a8a01f484b1f6c56ab2199 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 21 Nov 2023 07:56:11 -0500
Subject: [PATCH 030/150] (Re)capitalize AVA in vak/nets/ava.py

---
 src/vak/nets/ava.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index f4400ac0c..83fc4875f 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -18,7 +18,7 @@ def forward(self, x):
         return self.layer(x)
 
 
-class Ava(nn.Module):
+class AVA(nn.Module):
     """
     """
     def __init__(

From 69ba2611643d73688c7c3e5ff448ec96ca59d8d5 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 21 Nov 2023 08:48:35 -0500
Subject: [PATCH 031/150] Rewrite AVA.__init__ to use `encoder_channels` +
 `out_channels` instead of `hidden_dims`

---
 src/vak/nets/ava.py | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index 83fc4875f..d71718671 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -24,7 +24,7 @@ class AVA(nn.Module):
     def __init__(
         self,
         input_shape: tuple[int] = (1, 128, 128),
-        hidden_dims: tuple[int] = (8, 8, 16, 16, 24, 24),
+        encoder_channels: tuple[int] = (8, 8, 16, 16, 24, 24),
         fc_dims: tuple[int] = (1024, 256, 64),
         z_dim: int = 32,
     ):
@@ -32,7 +32,7 @@ def __init__(
         """
         super().__init__()
         fc_dims = (*fc_dims, z_dim)
-        hidden_dims = (*hidden_dims, z_dim)
+        encoder_channels = (*encoder_channels, z_dim)
 
         self.input_shape = input_shape
         self.in_channels = input_shape[0]
@@ -44,19 +44,20 @@ def __init__(
         # ---- build encoder
         modules = []
         in_channels = self.in_channels
-        for h_dim in hidden_dims:
-            stride = 2 if h_dim == in_channels else 1
+        for out_channels in encoder_channels:
+            # AVA uses stride=2 when out_channels == in_channels
+            stride = 2 if out_channels == in_channels else 1
             modules.append(
                 nn.Sequential(
                     nn.BatchNorm2d(in_channels),
                     nn.Conv2d(
-                        in_channels, out_channels=h_dim,
+                        in_channels, out_channels,
                         kernel_size=3, stride=stride, padding=1
                     ),
                     nn.ReLU()
                 )
             )
-            in_channels = h_dim
+            in_channels = out_channels
         self.encoder = nn.Sequential(*modules)
 
         # ---- build encoder bottleneck
@@ -91,19 +92,19 @@ def __init__(
 
         # ---- build decoder
         modules = []
-        hidden_dims = (*hidden_dims[-2::-1], self.in_channels)
-        for i, h_dim in enumerate(hidden_dims):
-            stride = 2 if h_dim == in_channels else 1
-            output_padding = 1 if h_dim == in_channels else 0
+        decoder_channels = (*encoder_channels[-2::-1], self.in_channels)
+        for i, out_channels in enumerate(decoder_channels):
+            stride = 2 if out_channels == in_channels else 1
+            output_padding = 1 if out_channels == in_channels else 0
             layers = [nn.BatchNorm2d(in_channels),
                       nn.ConvTranspose2d(
                           in_channels, out_channels=h_dim,
                           kernel_size=3, stride=stride, padding=1, output_padding=output_padding
                       )]
-            if i != len(hidden_dims) - 1:
+            if i != len(decoder_channels) - 1:
                 layers.append(nn.ReLU())
             modules.append(nn.Sequential(*layers))
-            in_channels = h_dim
+            in_channels = out_channels
         self.decoder = nn.Sequential(*modules)
 
     def encode(self, x):

From e6d0447b3a5fd9935188c99676c197bf06cda80d Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 21 Nov 2023 08:49:20 -0500
Subject: [PATCH 032/150] Use torch.flatten in AVA.encode

---
 src/vak/nets/ava.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index d71718671..90c1773cb 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -110,7 +110,8 @@ def __init__(
     def encode(self, x):
         """
         """
-        x = self.encoder(x.view(-1, self.in_fc))
+        x = self.encoder(x)
+        x = torch.flatten(x, start_dim=1)
         x = self.encoder_bottleneck(x)
         mu = self.mu_layer(x)
         cov_factor = self.cov_factor_layer(x).unsqueeze(-1)

From 94b74870391201502f6bfb1bf52453c3ae2e94f4 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 22 Nov 2023 12:53:31 -0500
Subject: [PATCH 033/150] Further revise src/vak/nets/ava.py

---
 src/vak/nets/ava.py | 82 +++++++++++++++++++++++++--------------------
 1 file changed, 45 insertions(+), 37 deletions(-)

diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index 90c1773cb..98e6bddfb 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -1,18 +1,20 @@
 from __future__ import annotations
 
+from typing import Sequence
+
 import numpy as np
 import torch
 from torch import nn
 from torch.distributions import LowRankMultivariateNormal
 
 
-class BottleneckLayer(nn.Module):
-    def __init__(self, dims):
+class FullyConnectedLayers(nn.Module):
+    def __init__(self, n_features: Sequence[int]):
         super().__init__()
         self.layer = nn.Sequential(
-            nn.Linear(dims[0], dims[1]),
+            nn.Linear(n_features[0], n_features[1]),
             nn.ReLU(),
-            nn.Linear(dims[1], dims[2]))
+            nn.Linear(n_features[1], n_features[2]))
 
     def forward(self, x):
         return self.layer(x)
@@ -23,23 +25,19 @@ class AVA(nn.Module):
     """
     def __init__(
         self,
-        input_shape: tuple[int] = (1, 128, 128),
-        encoder_channels: tuple[int] = (8, 8, 16, 16, 24, 24),
-        fc_dims: tuple[int] = (1024, 256, 64),
+        input_shape: Sequence[int] = (1, 128, 128),
+        encoder_channels: Sequence[int] = (8, 8, 16, 16, 24, 24, 32),
+        fc_dims: Sequence[int] = (1024, 256, 64),
         z_dim: int = 32,
     ):
         """
         """
         super().__init__()
-        fc_dims = (*fc_dims, z_dim)
-        encoder_channels = (*encoder_channels, z_dim)
 
         self.input_shape = input_shape
         self.in_channels = input_shape[0]
-        self.fc_view = (int(fc_dims[-1]),int(fc_dims[-1]/2),int(fc_dims[-1]/2))
-        self.x_shape = input_shape[1:]
+        self.x_shape = input_shape[1:]  # channels * hide * width
         self.x_dim = np.prod(self.x_shape)
-        self.in_fc = int(self.x_dim / 2)
 
         # ---- build encoder
         modules = []
@@ -60,48 +58,58 @@ def __init__(
             in_channels = out_channels
         self.encoder = nn.Sequential(*modules)
 
-        # ---- build encoder bottleneck
+        # we compute shapes dynamically to make code more general
+        # we could compute this using equations for conv shape etc. to avoid running tensor through encoder
+        dummy_inp = torch.rand(1, *input_shape)
+        out = self.encoder(dummy_inp)
+        self.fc_view = tuple(out.shape[1:])
+        out = torch.flatten(out, start_dim=1)
+        self.in_fc_dims = out.shape[1]
+
+        # ---- build shared fully-connected layers of encoder
         modules = []
-        in_fc = self.in_fc
-        for fc_dim in fc_dims[:-2]:
+        in_features = self.in_fc_dims
+        for out_features in fc_dims[:-1]:
             modules.append(
                 nn.Sequential(
-                    nn.Linear(in_fc, fc_dim),
+                    nn.Linear(in_features, out_features),
                     nn.ReLU()
                 )
             )
-            in_fc = fc_dim
-        self.encoder_bottleneck = nn.Sequential(*modules)
+            in_features = out_features
+        self.shared_encoder_fc = nn.Sequential(*modules)
 
-        self.mu_layer = BottleneckLayer(fc_dims[-3:])
-        self.cov_factor_layer = BottleneckLayer(fc_dims[-3:])
-        self.cov_diag_layer = BottleneckLayer(fc_dims[-3:])
+        fc_features = (*fc_dims[-2:], z_dim)
+        self.mu_fc = FullyConnectedLayers(fc_features)
+        self.cov_factor_fc = FullyConnectedLayers(fc_features)
+        self.cov_diag_fc = FullyConnectedLayers(fc_features)
 
-        # ---- build decoder bottleneck
+        # ---- build fully-connected layers of decoder
         modules = []
-        fc_dims = fc_dims[::-1]
-        for i in range(len(fc_dims)):
-            out = self.in_fc if i == len(fc_dims) - 1 else fc_dims[i+1]
+        decoder_dims = (*reversed(fc_dims), self.in_fc_dims)
+        in_features = z_dim
+        for i, out_features in enumerate(decoder_dims):
             modules.append(
                 nn.Sequential(
-                    nn.Linear(fc_dims[i], out),
+                    nn.Linear(in_features, out_features),
                     nn.ReLU()
                 )
             )
-        self.decoder_bottleneck = nn.Sequential(*modules)
+        self.decoder_fc = nn.Sequential(*modules)
 
         # ---- build decoder
         modules = []
-        decoder_channels = (*encoder_channels[-2::-1], self.in_channels)
+        decoder_channels = (*reversed(encoder_channels[:-1]), self.in_channels)
+        in_channels = encoder_channels[-1]
         for i, out_channels in enumerate(decoder_channels):
             stride = 2 if out_channels == in_channels else 1
             output_padding = 1 if out_channels == in_channels else 0
             layers = [nn.BatchNorm2d(in_channels),
                       nn.ConvTranspose2d(
-                          in_channels, out_channels=h_dim,
+                          in_channels, out_channels,
                           kernel_size=3, stride=stride, padding=1, output_padding=output_padding
                       )]
-            if i != len(decoder_channels) - 1:
+            if i < len(decoder_channels) - 1:
                 layers.append(nn.ReLU())
             modules.append(nn.Sequential(*layers))
             in_channels = out_channels
@@ -112,18 +120,18 @@ def encode(self, x):
         """
         x = self.encoder(x)
         x = torch.flatten(x, start_dim=1)
-        x = self.encoder_bottleneck(x)
-        mu = self.mu_layer(x)
-        cov_factor = self.cov_factor_layer(x).unsqueeze(-1)
-        cov_diag = torch.exp(self.cov_diag_layer(x))
+        x = self.shared_encoder_fc(x)
+        mu = self.mu_fc(x)
+        cov_factor = self.cov_factor_fc(x).unsqueeze(-1)  # Last dimension is rank \Sigma = 1
+        cov_diag = torch.exp(self.cov_diag_fc(x))  # cov_diag must be positive
         z, latent_dist = self.reparametrize(mu, cov_factor, cov_diag)
         return z, latent_dist
 
     def decode(self, z):
         """
         """
-        z = self.decoder_bottleneck(z).view(-1, self.fc_view[0], self.fc_view[1], self.fc_view[2])
-        z = self.decoder(z).view(-1, self.x_dim)
+        z = self.decoder_fc(z).view(-1, *self.fc_view)
+        z = self.decoder(z).view(-1, *self.x_dim)
         return z
 
     @staticmethod
@@ -134,5 +142,5 @@ def reparametrize(mu, cov_factor, cov_diag):
 
     def forward(self, x):
         z, latent_dist = self.encode(x)
-        x_rec = self.decode(z).view(-1, self.x_shape[0], self.x_shape[1])
+        x_rec = self.decode(z)
         return x_rec, z, latent_dist

From b2b854d8d25913c77ff34d4c5e38c8657855af99 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 22 Nov 2023 12:53:41 -0500
Subject: [PATCH 034/150] Fix default learning rate in src/vak/models/ava.py

---
 src/vak/models/ava.py | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/src/vak/models/ava.py b/src/vak/models/ava.py
index 27bb050a5..d7a5ce976 100644
--- a/src/vak/models/ava.py
+++ b/src/vak/models/ava.py
@@ -1,15 +1,26 @@
+"""Autoencoded Vocal Analysis (AVA) model [1]_.
+
+.. [1] Goffinet, J., Brudner, S., Mooney, R., & Pearson, J. (2021).
+   Low-dimensional learned feature spaces quantify individual and group differences in vocal repertoires.
+   eLife, 10:e67855. https://doi.org/10.7554/eLife.67855
+"""
 from __future__ import annotations
 
 import torch
 from torchmetrics import KLDivergence
-from .. import metrics, nets
+from .. import nets
 from .decorator import model
 from .vae_model import VAEModel
 from ..nn.loss import VaeElboLoss
 
+
 @model(family=VAEModel)
 class AVA:
-    """
+    """Autoencoded Vocal Analysis (AVA) model [1]_.
+
+    .. [1] Goffinet, J., Brudner, S., Mooney, R., & Pearson, J. (2021).
+       Low-dimensional learned feature spaces quantify individual and group differences in vocal repertoires.
+       eLife, 10:e67855. https://doi.org/10.7554/eLife.67855
     """
     network = nets.AVA
     loss = VaeElboLoss
@@ -18,4 +29,4 @@ class AVA:
         "loss": VaeElboLoss,
         "kl": KLDivergence
     }
-    default_config = {"optimizer": {"lr": 0.003}}
\ No newline at end of file
+    default_config = {"optimizer": {"lr": 1e-3}}

From f4202684c711c976659454662feae65affe05d70 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 22 Nov 2023 13:26:33 -0500
Subject: [PATCH 035/150] Add missing 'in_features = out_features' in
 AVA.__init__

---
 src/vak/nets/ava.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index 98e6bddfb..d4b6bb7c4 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -95,6 +95,7 @@ def __init__(
                     nn.ReLU()
                 )
             )
+            in_features = out_features
         self.decoder_fc = nn.Sequential(*modules)
 
         # ---- build decoder

From 6b0f57659f3ee70b11bae4e14efb18fa148949cf Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 22 Nov 2023 13:26:56 -0500
Subject: [PATCH 036/150] Fix AVA.decode to use self.x_shape

---
 src/vak/nets/ava.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index d4b6bb7c4..77a449e58 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -132,7 +132,7 @@ def decode(self, z):
         """
         """
         z = self.decoder_fc(z).view(-1, *self.fc_view)
-        z = self.decoder(z).view(-1, *self.x_dim)
+        z = self.decoder(z).view(-1, *self.x_shape)
         return z
 
     @staticmethod

From 88b001e17ae511cf658cce504b5dcfb7083fd9f6 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 22 Nov 2023 13:42:39 -0500
Subject: [PATCH 037/150] Use input_shape in AVA.decode

---
 src/vak/nets/ava.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index 77a449e58..e3c5c0994 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -132,7 +132,7 @@ def decode(self, z):
         """
         """
         z = self.decoder_fc(z).view(-1, *self.fc_view)
-        z = self.decoder(z).view(-1, *self.x_shape)
+        z = self.decoder(z).view(-1, *self.input_shape)
         return z
 
     @staticmethod

From c4fdd3a7820e3cc3ec69fa09b8fd11e1158c2af1 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 27 Dec 2023 20:57:09 -0500
Subject: [PATCH 038/150] WIP: Add tests/test_nets/test_ava.py

---
 tests/test_nets/test_ava.py | 57 +++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 tests/test_nets/test_ava.py

diff --git a/tests/test_nets/test_ava.py b/tests/test_nets/test_ava.py
new file mode 100644
index 000000000..4f6a91bf1
--- /dev/null
+++ b/tests/test_nets/test_ava.py
@@ -0,0 +1,57 @@
+import torch
+import pytest
+
+import vak.nets
+
+
+class TestAVA:
+
+    @pytest.mark.parametrize(
+        'input_shape',
+        [
+            (
+                    1, 128, 128,
+            ),
+            (
+                    1, 256, 256,
+            ),
+        ]
+    )
+    def test_init(self, input_shape):
+        """test we can instantiate AVA
+        and it has the expected attributes"""
+        net = vak.nets.AVA(input_shape)
+        assert isinstance(net, vak.nets.AVA)
+        for expected_attr, expected_type in (
+            ('input_shape', tuple),
+            ('in_channels', int),
+            ('x_shape', tuple),
+            ('x_dim', int),
+            ('encoder', torch.nn.Module),
+            ('shared_encoder_fc', torch.nn.Module),
+            ('mu_fc', torch.nn.Module),
+            ('cov_factor_fc', torch.nn.Module),
+            ('cov_diag_fc', torch.nn.Module),
+            ('decoder_fc', torch.nn.Module),
+            ('decoder', torch.nn.Module),
+        ):
+            assert hasattr(net, expected_attr)
+            assert isinstance(getattr(net, expected_attr), expected_type)
+
+        assert net.input_shape == input_shape
+
+    @pytest.mark.parametrize(
+        'input_shape, batch_size',
+        [
+            ((1, 128, 128,), 32),
+            ((1, 256, 256,), 64),
+        ]
+    )
+    def test_forward(self, input_shape, batch_size):
+        """test we can forward a tensor through a ConvEncoder instance
+        and get the expected output"""
+
+        input = torch.rand(batch_size, *input_shape)  # a "batch"
+        net = vak.nets.AVA(input_shape)
+        out = net(input)
+        assert isinstance(out, torch.Tensor)

From d28ab0b8b9c2b26e2f9e4ce8e9abc7a7303c800c Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 27 Dec 2023 20:57:25 -0500
Subject: [PATCH 039/150] WIP: Add docstrings in src/vak/nets/ava.py

---
 src/vak/nets/ava.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index e3c5c0994..d487706d3 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -22,6 +22,7 @@ def forward(self, x):
 
 class AVA(nn.Module):
     """
+
     """
     def __init__(
         self,
@@ -31,6 +32,13 @@ def __init__(
         z_dim: int = 32,
     ):
         """
+
+        Parameters
+        ----------
+        input_shape
+        encoder_channels
+        fc_dims
+        z_dim
         """
         super().__init__()
 

From 911013cfae21905f4407a7eac968407b88ea2281 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 27 Dec 2023 20:57:40 -0500
Subject: [PATCH 040/150] Remove unused import, extra line break at end of
 tests/test_nets/test_convencoder.py

---
 tests/test_nets/test_convencoder.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/tests/test_nets/test_convencoder.py b/tests/test_nets/test_convencoder.py
index eaa3b6f6d..f89646139 100644
--- a/tests/test_nets/test_convencoder.py
+++ b/tests/test_nets/test_convencoder.py
@@ -1,5 +1,3 @@
-import inspect
-
 import torch
 import pytest
 
@@ -50,4 +48,3 @@ def test_forward(self, input_shape, batch_size):
         net = vak.nets.ConvEncoder(input_shape)
         out = net(input)
         assert isinstance(out, torch.Tensor)
-

From 0cb4933894226d07fc82c875740a15d0a15f3943 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 27 Dec 2023 21:41:37 -0500
Subject: [PATCH 041/150] Cast AVA attributes in_channels and x_dim to be ints

---
 src/vak/nets/ava.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index d487706d3..88565eeea 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -43,9 +43,9 @@ def __init__(
         super().__init__()
 
         self.input_shape = input_shape
-        self.in_channels = input_shape[0]
+        self.in_channels = int(input_shape[0])
         self.x_shape = input_shape[1:]  # channels * hide * width
-        self.x_dim = np.prod(self.x_shape)
+        self.x_dim = int(np.prod(self.x_shape))
 
         # ---- build encoder
         modules = []

From 94fe92bbaa70698009426aed1d8d96df22f814ee Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 27 Dec 2023 21:41:50 -0500
Subject: [PATCH 042/150] Add newline at end of src/vak/models/vae_model.py

---
 src/vak/models/vae_model.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/vak/models/vae_model.py b/src/vak/models/vae_model.py
index 72b320299..7fe872ccc 100644
--- a/src/vak/models/vae_model.py
+++ b/src/vak/models/vae_model.py
@@ -1,18 +1,16 @@
 from __future__ import annotations
 
-import pathlib
 from typing import Callable, ClassVar, Type
 
-import pytorch_lightning as lightning
 import torch
 import torch.utils.data
-from torch import nn
 from operator import itemgetter
 
 from .registry import model_family
 from . import base
 from .definition import ModelDefinition
 
+
 @model_family
 class VAEModel(base.Model):
     definition: ClassVar[ModelDefinition]
@@ -89,4 +87,4 @@ def from_config(
             optimizer=optimizer,
             loss=loss,
             metrics=metrics,
-        )
\ No newline at end of file
+        )

From e0fa0872281953adf218c8081574539216d8aadb Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 27 Dec 2023 21:41:56 -0500
Subject: [PATCH 043/150] Fix tests in tests/test_nets/test_ava.py

---
 tests/test_nets/test_ava.py | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/tests/test_nets/test_ava.py b/tests/test_nets/test_ava.py
index 4f6a91bf1..fcf10bc41 100644
--- a/tests/test_nets/test_ava.py
+++ b/tests/test_nets/test_ava.py
@@ -54,4 +54,8 @@ def test_forward(self, input_shape, batch_size):
         input = torch.rand(batch_size, *input_shape)  # a "batch"
         net = vak.nets.AVA(input_shape)
         out = net(input)
-        assert isinstance(out, torch.Tensor)
+        assert len(out) == 3
+        x_rec, z, latent_dist = out
+        for tensor in (x_rec, z):
+            assert isinstance(tensor, torch.Tensor)
+        assert isinstance(latent_dist, torch.distributions.LowRankMultivariateNormal)

From 9bdf7bbaa09f36b78eee497d7c117c7c98666b46 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 18:03:25 -0500
Subject: [PATCH 044/150] Add src/vak/transforms/defaults/vae.py

---
 src/vak/transforms/defaults/vae.py | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)
 create mode 100644 src/vak/transforms/defaults/vae.py

diff --git a/src/vak/transforms/defaults/vae.py b/src/vak/transforms/defaults/vae.py
new file mode 100644
index 000000000..53126c33f
--- /dev/null
+++ b/src/vak/transforms/defaults/vae.py
@@ -0,0 +1,26 @@
+"""Default transforms for VAE models."""
+from __future__ import annotations
+
+import torchvision.transforms
+
+from .. import transforms as vak_transforms
+
+
+def get_default_vae_transform(
+    transform_kwargs,
+) -> torchvision.transforms.Compose:
+    """Get default transform for VAE model.
+
+    Parameters
+    ----------
+    transform_kwargs : dict
+
+    Returns
+    -------
+    transform : Callable
+    """
+    transforms = [
+        vak_transforms.ToFloatTensor(),
+        vak_transforms.AddChannel(),
+    ]
+    return torchvision.transforms.Compose(transforms)

From 6d853361b7789a298f282b2ae690c2d4a3176de6 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 18:03:35 -0500
Subject: [PATCH 045/150] Fix typo in docstring in
 src/vak/transforms/defaults/parametric_umap.py

---
 src/vak/transforms/defaults/parametric_umap.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vak/transforms/defaults/parametric_umap.py b/src/vak/transforms/defaults/parametric_umap.py
index 83c568b06..be4b51864 100644
--- a/src/vak/transforms/defaults/parametric_umap.py
+++ b/src/vak/transforms/defaults/parametric_umap.py
@@ -9,7 +9,7 @@
 def get_default_parametric_umap_transform(
     transform_kwargs,
 ) -> torchvision.transforms.Compose:
-    """Get default transform for frame classification model.
+    """Get default transform for Parametric UMAP model.
 
     Parameters
     ----------

From 81ce1048e3934f1d10515b18490d7354a0618a07 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 18:03:49 -0500
Subject: [PATCH 046/150] Modify src/vak/transforms/defaults/get.py to handle
 VAEModel

---
 src/vak/transforms/defaults/get.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/src/vak/transforms/defaults/get.py b/src/vak/transforms/defaults/get.py
index 0851d515c..e7a6e2abd 100644
--- a/src/vak/transforms/defaults/get.py
+++ b/src/vak/transforms/defaults/get.py
@@ -2,7 +2,7 @@
 from __future__ import annotations
 
 from ... import models
-from . import frame_classification, parametric_umap
+from . import frame_classification, parametric_umap, vae
 
 
 def get_default_transform(
@@ -44,3 +44,8 @@ def get_default_transform(
         return parametric_umap.get_default_parametric_umap_transform(
             transform_kwargs
         )
+
+    elif model_family == "VAEModel":
+        return vae.get_default_vae_transform(
+            transform_kwargs
+        )

From 7ba9481772bb15a985d43088216a5f44f2d0adbb Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 18:15:06 -0500
Subject: [PATCH 047/150] Clean up docstrings in
 src/vak/datasets/parametric_umap/metadata.py

---
 src/vak/datasets/parametric_umap/metadata.py | 20 +++++++++++++-------
 1 file changed, 13 insertions(+), 7 deletions(-)

diff --git a/src/vak/datasets/parametric_umap/metadata.py b/src/vak/datasets/parametric_umap/metadata.py
index ac0b8a137..783c9bdfc 100644
--- a/src/vak/datasets/parametric_umap/metadata.py
+++ b/src/vak/datasets/parametric_umap/metadata.py
@@ -1,7 +1,8 @@
 """A dataclass that represents metadata
-associated with a dimensionality reduction dataset,
-as generated by
-:func:`vak.core.prep.frame_classification.prep_dimensionality_reduction_dataset`"""
+associated with a parametric UMAP dataset.
+
+The metadata is generated by
+:func:`vak.core.prep.parametric_umap.prep_parametric_umap_dataset`."""
 from __future__ import annotations
 
 import json
@@ -45,8 +46,10 @@ def is_valid_spect_format(instance, attribute, value):
 @attr.define
 class Metadata:
     """A dataclass that represents metadata
-    associated with a dataset that was
-    generated by :func:`vak.core.prep.prep`.
+    associated with a parametric UMAP dataset.
+
+    The metadata is generated by
+    :func:`vak.core.prep.parametric_umap.prep_parametric_umap_dataset`.
 
     Attributes
     ----------
@@ -54,7 +57,10 @@ class Metadata:
         Name of csv file representing the source files in the dataset.
         Csv file will be located in root of directory representing dataset,
         so only the filename is given.
-    audio_format
+    shape : tuple
+        Of ints, the shape of the samples.
+    audio_format : str
+        The format of the source audio files used to generate the dataset.
     """
 
     # declare this as a constant to avoid
@@ -130,7 +136,7 @@ def to_json(self, dataset_path: str | pathlib.Path) -> None:
 
         This method is called by :func:`vak.core.prep.prep`
         after it generates a dataset and then creates an
-        instance of :class:`~vak.datasets.frame_classification.FrameClassificationDatatsetMetadata`
+        instance of :class:`~vak.datasets.parametric_umap.Metadata`
         with metadata about that dataset.
 
         Parameters

From 75c72a0c86d93c4b86993689efd918ab1886043e Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 18:48:20 -0500
Subject: [PATCH 048/150] Revise docstrings in
 src/vak/prep/parametric_umap/parametric_umap.py

---
 src/vak/prep/parametric_umap/parametric_umap.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/vak/prep/parametric_umap/parametric_umap.py b/src/vak/prep/parametric_umap/parametric_umap.py
index 560b5a699..b544c8458 100644
--- a/src/vak/prep/parametric_umap/parametric_umap.py
+++ b/src/vak/prep/parametric_umap/parametric_umap.py
@@ -1,3 +1,4 @@
+"""Prepare datasets for parametric UMAP models."""
 from __future__ import annotations
 
 import json
@@ -37,8 +38,7 @@ def prep_parametric_umap_dataset(
     spect_key: str = "s",
     timebins_key: str = "t",
 ):
-    """Prepare datasets for neural network models
-    that perform a dimensionality reduction task.
+    """Prepare datasets for parametric UMAP models.
 
     For general information on dataset preparation,
     see the docstring for :func:`vak.prep.prep`.

From 8339c9b0788bbff0dd9eff92407caed45768ad90 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 18:48:36 -0500
Subject: [PATCH 049/150] WIP: Add src/vak/datasets/vae/

---
 src/vak/datasets/vae/__init__.py        |   1 +
 src/vak/datasets/vae/metadata.py        | 153 ++++++++++
 src/vak/datasets/vae/segment_dataset.py |  77 ++++++
 src/vak/datasets/vae/window_dataset.py  | 354 ++++++++++++++++++++++++
 4 files changed, 585 insertions(+)
 create mode 100644 src/vak/datasets/vae/__init__.py
 create mode 100644 src/vak/datasets/vae/metadata.py
 create mode 100644 src/vak/datasets/vae/segment_dataset.py
 create mode 100644 src/vak/datasets/vae/window_dataset.py

diff --git a/src/vak/datasets/vae/__init__.py b/src/vak/datasets/vae/__init__.py
new file mode 100644
index 000000000..133b3cd80
--- /dev/null
+++ b/src/vak/datasets/vae/__init__.py
@@ -0,0 +1 @@
+from .segment_dataset import SegmentDataset
diff --git a/src/vak/datasets/vae/metadata.py b/src/vak/datasets/vae/metadata.py
new file mode 100644
index 000000000..c193b82f9
--- /dev/null
+++ b/src/vak/datasets/vae/metadata.py
@@ -0,0 +1,153 @@
+"""A dataclass that represents metadata
+associated with a VAE dataset,
+as generated by
+:func:`vak.core.prep.frame_classification.prep_dimensionality_reduction_dataset`"""
+from __future__ import annotations
+
+import json
+import pathlib
+from typing import ClassVar
+
+import attr
+
+
+def is_valid_dataset_csv_filename(instance, attribute, value):
+    valid = "_prep_" in value and value.endswith(".csv")
+    if not valid:
+        raise ValueError(
+            f"Invalid dataset csv filename: {value}."
+            f'Filename should contain the string "_prep_" '
+            f"and end with the extension .csv."
+            f"Valid filenames are generated by "
+            f"vak.core.prep.generate_dataset_csv_filename"
+        )
+
+
+def is_valid_audio_format(instance, attribute, value):
+    import vak.common.constants
+
+    if value not in vak.common.constants.VALID_AUDIO_FORMATS:
+        raise ValueError(
+            f"Not a valid audio format: {value}. Valid audio formats are: {vak.common.constants.VALID_AUDIO_FORMATS}"
+        )
+
+
+def is_valid_spect_format(instance, attribute, value):
+    import vak.common.constants
+
+    if value not in vak.common.constants.VALID_SPECT_FORMATS:
+        raise ValueError(
+            f"Not a valid spectrogram format: {value}. "
+            f"Valid spectrogram formats are: {vak.common.constants.VALID_SPECT_FORMATS}"
+        )
+
+
+@attr.define
+class Metadata:
+    """A dataclass that represents metadata
+    associated with a dataset that was
+    generated by :func:`vak.core.prep.prep`.
+
+    Attributes
+    ----------
+    dataset_csv_filename : str
+        Name of csv file representing the source files in the dataset.
+        Csv file will be located in root of directory representing dataset,
+        so only the filename is given.
+    audio_format
+    """
+
+    # declare this as a constant to avoid
+    # needing to remember this in multiple places, and to use in unit tests
+    METADATA_JSON_FILENAME: ClassVar = "metadata.json"
+
+    dataset_csv_filename: str = attr.field(
+        converter=str, validator=is_valid_dataset_csv_filename
+    )
+
+    shape: tuple = attr.field(converter=tuple)
+
+    @shape.validator
+    def is_valid_shape(self, attribute, value):
+        if not isinstance(value, tuple):
+            raise TypeError(
+                f"`shape` should be a tuple but type was: {type(value)}"
+            )
+        if not all([isinstance(val, int) and val > 0 for val in value]):
+            raise ValueError(
+                f"All values of `shape` should be positive integers but values were: {value}"
+            )
+
+    audio_format: str = attr.field(
+        converter=attr.converters.optional(str),
+        validator=attr.validators.optional(is_valid_audio_format),
+        default=None,
+    )
+
+    @classmethod
+    def from_path(cls, json_path: str | pathlib.Path):
+        """Load dataset metadata from a json file.
+
+        Class method that returns an instance of
+        :class:`~vak.datasets.frame_classification.FrameClassificationDatatsetMetadata`.
+
+        Parameters
+        ----------
+        json_path : string, pathlib.Path
+            Path to a 'metadata.json' file created by
+            :func:`vak.core.prep.prep` when generating
+            a dataset.
+
+        Returns
+        -------
+        metadata : vak.datasets.frame_classification.FrameClassificationDatatsetMetadata
+            Instance of :class:`~vak.datasets.frame_classification.FrameClassificationDatatsetMetadata`
+            with metadata loaded from json file.
+        """
+        json_path = pathlib.Path(json_path)
+        with json_path.open("r") as fp:
+            metadata_json = json.load(fp)
+        return cls(**metadata_json)
+
+    @classmethod
+    def from_dataset_path(cls, dataset_path: str | pathlib.Path):
+        dataset_path = pathlib.Path(dataset_path)
+        if not dataset_path.exists() or not dataset_path.is_dir():
+            raise NotADirectoryError(
+                f"`dataset_path` not found or not recognized as a directory: {dataset_path}"
+            )
+
+        metadata_json_path = dataset_path / cls.METADATA_JSON_FILENAME
+        if not metadata_json_path.exists():
+            raise FileNotFoundError(
+                f"Metadata file not found: {metadata_json_path}"
+            )
+
+        return cls.from_path(metadata_json_path)
+
+    def to_json(self, dataset_path: str | pathlib.Path) -> None:
+        """Dump dataset metadata to a json file.
+
+        This method is called by :func:`vak.core.prep.prep`
+        after it generates a dataset and then creates an
+        instance of :class:`~vak.datasets.frame_classification.FrameClassificationDatatsetMetadata`
+        with metadata about that dataset.
+
+        Parameters
+        ----------
+        dataset_path : string, pathlib.Path
+            Path to root of a directory representing a dataset
+            generated by :func:`vak.core.prep.prep`.
+            where 'metadata.json' file
+            should be saved.
+        """
+        dataset_path = pathlib.Path(dataset_path)
+        if not dataset_path.exists() or not dataset_path.is_dir():
+            raise NotADirectoryError(
+                f"dataset_path not recognized as a directory: {dataset_path}"
+            )
+
+        json_dict = attr.asdict(self)
+        json_path = dataset_path / self.METADATA_JSON_FILENAME
+        with json_path.open("w") as fp:
+            json.dump(json_dict, fp, indent=4)
diff --git a/src/vak/datasets/vae/segment_dataset.py b/src/vak/datasets/vae/segment_dataset.py
new file mode 100644
index 000000000..7e8d36ad1
--- /dev/null
+++ b/src/vak/datasets/vae/segment_dataset.py
@@ -0,0 +1,77 @@
+from __future__ import annotations
+
+import pathlib
+from typing import Callable
+
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+import torch.utils.data
+
+
+class SegmentDataset(torch.utils.data.Dataset):
+    """Pipeline for loading samples from a dataset of spectrograms
+
+    This is a simplified version of
+    :class:`vak.datasets.parametric_umap.ParametricUmapInferenceDataset`.
+    """
+
+    def __init__(
+            self,
+            data: npt.NDArray,
+            dataset_df: pd.DataFrame,
+            transform: Callable | None = None,
+    ):
+        self.data = data
+        self.dataset_df = dataset_df
+        self.transform = transform
+
+    @property
+    def duration(self):
+        return self.dataset_df["duration"].sum()
+
+    def __len__(self):
+        return self.data.shape[0]
+
+    @property
+    def shape(self):
+        tmp_x_ind = 0
+        tmp_item = self.__getitem__(tmp_x_ind)
+        return tmp_item["x"].shape
+
+    def __getitem__(self, index):
+        x = self.data[index]
+        df_index = self.dataset_df.index[index]
+        if self.transform:
+            x = self.transform(x)
+        return {"x": x, "df_index": df_index}
+
+    @classmethod
+    def from_dataset_path(
+            cls,
+            dataset_path: str | pathlib.Path,
+            split: str,
+            transform: Callable | None = None,
+    ):
+        import vak.datasets  # import here just to make classmethod more explicit
+
+        dataset_path = pathlib.Path(dataset_path)
+        metadata = vak.datasets.parametric_umap.Metadata.from_dataset_path(
+            dataset_path
+        )
+
+        dataset_csv_path = dataset_path / metadata.dataset_csv_filename
+        dataset_df = pd.read_csv(dataset_csv_path)
+        split_df = dataset_df[dataset_df.split == split]
+
+        data = np.stack(
+            [
+                np.load(dataset_path / spect_path)
+                for spect_path in split_df.spect_path.values
+            ]
+        )
+        return cls(
+            data,
+            split_df,
+            transform=transform,
+        )
diff --git a/src/vak/datasets/vae/window_dataset.py b/src/vak/datasets/vae/window_dataset.py
new file mode 100644
index 000000000..0d9309df7
--- /dev/null
+++ b/src/vak/datasets/vae/window_dataset.py
@@ -0,0 +1,354 @@
+"""Dataset class used for training VAE models on fixed-sized windows, such as a "shotgun VAE" [1]_."""
+from __future__ import annotations
+
+import pathlib
+from typing import Callable
+
+import numpy as np
+import numpy.typing as npt
+import pandas as pd
+
+from . import constants, helper
+from .metadata import Metadata
+
+
+
+
+
+class WindowDataset:
+    """Dataset class used for training VAE models on fixed-sized windows,
+    such as a "shotgun VAE" [1]_.
+
+    Attributes
+    ----------
+    dataset_path : pathlib.Path
+        Path to directory that represents a
+        frame classification dataset,
+        as created by
+        :func:`vak.prep.prep_frame_classification_dataset`.
+    split : str
+        The name of a split from the dataset,
+        one of {'train', 'val', 'test'}.
+    subset : str, optional
+        Name of subset to use.
+        If specified, this takes precedence over split.
+        Subsets are typically taken from the training data
+        for use when generating a learning curve.
+    dataset_df : pandas.DataFrame
+        A frame classification dataset,
+        represented as a :class:`pandas.DataFrame`.
+        This will be only the rows that correspond
+        to either ``subset`` or ``split`` from the
+        ``dataset_df`` that was passed in when
+        instantiating the class.
+    input_type : str
+        The type of input to the neural network model.
+        One of {'audio', 'spect'}.
+    frame_paths : numpy.ndarray
+        Paths to npy files containing frames,
+        either spectrograms or audio signals
+        that are input to the model.
+    sample_ids : numpy.ndarray
+        Indexing vector representing which sample
+        from the dataset every frame belongs to.
+    inds_in_sample : numpy.ndarray
+        Indexing vector representing which index
+        within each sample from the dataset
+        that every frame belongs to.
+    window_size : int
+        Size of windows to return;
+        number of frames.
+    frame_dur: float
+        Duration of a frame, i.e., a single sample in audio
+        or a single timebin in a spectrogram.
+    stride : int
+        The size of the stride used to determine which windows
+        are included in the dataset. The default is 1.
+        Used to compute ``window_inds``,
+        with the function
+        :func:`vak.datasets.frame_classification.window_dataset.get_window_inds`.
+    window_inds : numpy.ndarray, optional
+        A vector of valid window indices for the dataset.
+        If specified, this takes precedence over ``stride``.
+    transform : callable
+        The transform applied to the frames,
+         the input to the neural network :math:`x`.
+    """
+
+    def __init__(
+        self,
+        dataset_path: str | pathlib.Path,
+        dataset_df: pd.DataFrame,
+        input_type: str,
+        split: str,
+        sample_ids: npt.NDArray,
+        inds_in_sample: npt.NDArray,
+        window_size: int,
+        frame_dur: float,
+        stride: int = 1,
+        subset: str | None = None,
+        window_inds: npt.NDArray | None = None,
+        transform: Callable | None = None,
+    ):
+        """Initialize a new instance of a WindowDataset.
+
+        Parameters
+        ----------
+        dataset_path : pathlib.Path
+            Path to directory that represents a
+            VAE dataset, as created by :func:`vak.prep.prep_vae_dataset`.
+        dataset_df : pandas.DataFrame
+            A VAE dataset,
+            represented as a :class:`pandas.DataFrame`.
+        input_type : str
+            The type of input to the neural network model.
+            One of {'audio', 'spect'}.
+        split : str
+            The name of a split from the dataset,
+            one of {'train', 'val', 'test'}.
+        sample_ids : numpy.ndarray
+            Indexing vector representing which sample
+            from the dataset every frame belongs to.
+        inds_in_sample : numpy.ndarray
+            Indexing vector representing which index
+            within each sample from the dataset
+            that every frame belongs to.
+        window_size : int
+            Size of windows to return;
+            number of frames.
+        frame_dur: float
+            Duration of a frame, i.e., a single sample in audio
+            or a single timebin in a spectrogram.
+        stride : int
+            The size of the stride used to determine which windows
+            are included in the dataset. The default is 1.
+            Used to compute ``window_inds``,
+            with the function
+            :func:`vak.datasets.frame_classification.window_dataset.get_window_inds`.
+        subset : str, optional
+            Name of subset to use.
+            If specified, this takes precedence over split.
+            Subsets are typically taken from the training data
+            for use when generating a learning curve.
+        window_inds : numpy.ndarray, optional
+            A vector of valid window indices for the dataset.
+            If specified, this takes precedence over ``stride``.
+        transform : callable
+            The transform applied to the input to the neural network :math:`x`.
+        target_transform : callable
+            The transform applied to the target for the output
+            of the neural network :math:`y`.
+        """
+        from ... import (
+            prep,
+        )  # avoid circular import, use for constants.INPUT_TYPES
+
+        if input_type not in prep.constants.INPUT_TYPES:
+            raise ValueError(
+                f"``input_type`` must be one of: {prep.constants.INPUT_TYPES}\n"
+                f"Value for ``input_type`` was: {input_type}"
+            )
+
+        self.dataset_path = pathlib.Path(dataset_path)
+        self.split = split
+        self.subset = subset
+        # subset takes precedence over split, if specified
+        if subset:
+            dataset_df = dataset_df[dataset_df.subset == subset].copy()
+        else:
+            dataset_df = dataset_df[dataset_df.split == split].copy()
+        self.dataset_df = dataset_df
+        self.input_type = input_type
+        self.frames_paths = self.dataset_df[
+            constants.FRAMES_PATH_COL_NAME
+        ].values
+        self.sample_ids = sample_ids
+        self.inds_in_sample = inds_in_sample
+        self.window_size = window_size
+        self.frame_dur = float(frame_dur)
+        self.stride = stride
+        if window_inds is None:
+            window_inds = get_window_inds(
+                sample_ids.shape[-1], window_size, stride
+            )
+        self.window_inds = window_inds
+        self.transform = transform
+        self.target_transform = target_transform
+
+    @property
+    def duration(self):
+        return self.sample_ids.shape[-1] * self.frame_dur
+
+    @property
+    def shape(self):
+        tmp_x_ind = 0
+        one_x, _ = self.__getitem__(tmp_x_ind)
+        # used by vak functions that need to determine size of window,
+        # e.g. when initializing a neural network model
+        return one_x.shape
+
+    def _load_frames(self, frames_path):
+        """Helper function that loads "frames",
+        the input to the frame classification model.
+        Loads audio or spectrogram, depending on
+        :attr:`self.input_type`.
+        This function assumes that audio is in wav format
+        and spectrograms are in npz files.
+        """
+        return helper.load_frames(frames_path, self.input_type)
+
+    def __getitem__(self, idx):
+        window_idx = self.window_inds[idx]
+        sample_ids = self.sample_ids[
+            window_idx : window_idx + self.window_size  # noqa: E203
+        ]
+        uniq_sample_ids = np.unique(sample_ids)
+        if len(uniq_sample_ids) == 1:
+            # we repeat ourselves here to avoid running a loop on one item
+            sample_id = uniq_sample_ids[0]
+            frames_path = self.dataset_path / self.frames_paths[sample_id]
+            frames = self._load_frames(frames_path)
+            frame_labels = np.load(
+                self.dataset_path / self.frame_labels_paths[sample_id]
+            )
+
+        elif len(uniq_sample_ids) > 1:
+            frames = []
+            frame_labels = []
+            for sample_id in sorted(uniq_sample_ids):
+                frames_path = self.dataset_path / self.frames_paths[sample_id]
+                frames.append(self._load_frames(frames_path))
+                frame_labels.append(
+                    np.load(
+                        self.dataset_path / self.frame_labels_paths[sample_id]
+                    )
+                )
+
+            if all([frames_.ndim == 1 for frames_ in frames]):
+                # --> all 1-d audio vectors; if we specify `axis=1` here we'd get error
+                frames = np.concatenate(frames)
+            else:
+                frames = np.concatenate(frames, axis=1)
+            frame_labels = np.concatenate(frame_labels)
+        else:
+            raise ValueError(
+                f"Unexpected number of ``uniq_sample_ids``: {uniq_sample_ids}"
+            )
+
+        inds_in_sample = self.inds_in_sample[window_idx]
+        frames = frames[
+            ...,
+            inds_in_sample : inds_in_sample + self.window_size,  # noqa: E203
+        ]
+        frame_labels = frame_labels[
+            inds_in_sample : inds_in_sample + self.window_size  # noqa: E203
+        ]
+        if self.transform:
+            frames = self.transform(frames)
+        if self.target_transform:
+            frame_labels = self.target_transform(frame_labels)
+
+        return frames, frame_labels
+
+    def __len__(self):
+        """number of batches"""
+        return len(self.window_inds)
+
+    @classmethod
+    def from_dataset_path(
+        cls,
+        dataset_path: str | pathlib.Path,
+        window_size: int,
+        stride: int = 1,
+        split: str = "train",
+        subset: str | None = None,
+        transform: Callable | None = None,
+        target_transform: Callable | None = None,
+    ):
+        """Make a :class:`WindowDataset` instance,
+        given the path to a frame classification dataset.
+
+        Parameters
+        ----------
+        dataset_path : pathlib.Path
+            Path to directory that represents a
+            frame classification dataset,
+            as created by
+            :func:`vak.prep.prep_frame_classification_dataset`.
+        window_size : int
+            Size of windows to return;
+            number of frames.
+        stride : int
+            The size of the stride used to determine which windows
+            are included in the dataset. The default is 1.
+            Used to compute ``window_inds``,
+            with the function
+            :func:`vak.datasets.frame_classification.window_dataset.get_window_inds`.
+        split : str
+            The name of a split from the dataset,
+            one of {'train', 'val', 'test'}.
+        subset : str, optional
+            Name of subset to use.
+            If specified, this takes precedence over split.
+            Subsets are typically taken from the training data
+            for use when generating a learning curve.
+        transform : callable
+            The transform applied to the input to the neural network :math:`x`.
+        target_transform : callable
+            The transform applied to the target for the output
+            of the neural network :math:`y`.
+
+        Returns
+        -------
+        dataset : vak.datasets.frame_classification.WindowDataset
+        """
+        dataset_path = pathlib.Path(dataset_path)
+        metadata = Metadata.from_dataset_path(dataset_path)
+        frame_dur = metadata.frame_dur
+        input_type = metadata.input_type
+
+        dataset_csv_path = dataset_path / metadata.dataset_csv_filename
+        dataset_df = pd.read_csv(dataset_csv_path)
+
+        split_path = dataset_path / split
+        if subset:
+            sample_ids_path = (
+                split_path
+                / helper.sample_ids_array_filename_for_subset(subset)
+            )
+        else:
+            sample_ids_path = split_path / constants.SAMPLE_IDS_ARRAY_FILENAME
+        sample_ids = np.load(sample_ids_path)
+
+        if subset:
+            inds_in_sample_path = (
+                split_path
+                / helper.inds_in_sample_array_filename_for_subset(subset)
+            )
+        else:
+            inds_in_sample_path = (
+                split_path / constants.INDS_IN_SAMPLE_ARRAY_FILENAME
+            )
+        inds_in_sample = np.load(inds_in_sample_path)
+
+        window_inds_path = split_path / constants.WINDOW_INDS_ARRAY_FILENAME
+        if window_inds_path.exists():
+            window_inds = np.load(window_inds_path)
+        else:
+            window_inds = None
+
+        return cls(
+            dataset_path,
+            dataset_df,
+            input_type,
+            split,
+            sample_ids,
+            inds_in_sample,
+            window_size,
+            frame_dur,
+            stride,
+            subset,
+            window_inds,
+            transform,
+            target_transform,
+        )

From 630c942c807041f2b9dd7740810237f0d2371e73 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 18:48:45 -0500
Subject: [PATCH 050/150] WIP: Add src/vak/prep/vae/

---
 src/vak/prep/vae/vae.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 src/vak/prep/vae/vae.py

diff --git a/src/vak/prep/vae/vae.py b/src/vak/prep/vae/vae.py
new file mode 100644
index 000000000..e69de29bb

From 87d409783f9baed58d8a9fcabd0b5b7f7e51dbb3 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 18:48:53 -0500
Subject: [PATCH 051/150] WIP: Add src/vak/train/vae.py

---
 src/vak/train/vae.py | 256 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 256 insertions(+)
 create mode 100644 src/vak/train/vae.py

diff --git a/src/vak/train/vae.py b/src/vak/train/vae.py
new file mode 100644
index 000000000..375a138c4
--- /dev/null
+++ b/src/vak/train/vae.py
@@ -0,0 +1,256 @@
+"""Function that trains models in the Variational Autoencoder family."""
+from __future__ import annotations
+
+import datetime
+import logging
+import pathlib
+
+import numpy as np
+import pandas as pd
+import pytorch_lightning as lightning
+import torch.utils.data
+
+from .. import datasets, models, transforms
+from ..common import validators
+from ..common.device import get_default as get_default_device
+from ..common.trainer import get_default_trainer
+from ..datasets.vae import SegmentDataset
+from .frame_classification import get_split_dur
+
+
+logger = logging.getLogger(__name__)
+
+
+def get_trainer(
+    max_epochs: int,
+    ckpt_root: str | pathlib.Path,
+    ckpt_step: int,
+    log_save_dir: str | pathlib.Path,
+    device: str = "cuda",
+) -> lightning.Trainer:
+    """Returns an instance of ``lightning.Trainer``
+    with a default set of callbacks.
+    Used by ``vak.core`` functions."""
+    # TODO: use accelerator parameter, https://github.com/vocalpy/vak/issues/691
+    if device == "cuda":
+        accelerator = "gpu"
+    else:
+        accelerator = "auto"
+
+    ckpt_callback = lightning.callbacks.ModelCheckpoint(
+        dirpath=ckpt_root,
+        filename="checkpoint",
+        every_n_train_steps=ckpt_step,
+        save_last=True,
+        verbose=True,
+    )
+    ckpt_callback.CHECKPOINT_NAME_LAST = "checkpoint"
+    ckpt_callback.FILE_EXTENSION = ".pt"
+
+    val_ckpt_callback = lightning.callbacks.ModelCheckpoint(
+        monitor="val_loss",
+        dirpath=ckpt_root,
+        save_top_k=1,
+        mode="min",
+        filename="min-val-loss-checkpoint",
+        auto_insert_metric_name=False,
+        verbose=True,
+    )
+    val_ckpt_callback.FILE_EXTENSION = ".pt"
+
+    callbacks = [
+        ckpt_callback,
+        val_ckpt_callback,
+    ]
+
+    logger = lightning.loggers.TensorBoardLogger(save_dir=log_save_dir)
+
+    trainer = lightning.Trainer(
+        max_epochs=max_epochs,
+        accelerator=accelerator,
+        logger=logger,
+        callbacks=callbacks,
+    )
+    return trainer
+
+
+def train_vae_model(
+    model_name: str,
+    model_config: dict,
+    dataset_path: str | pathlib.Path,
+    batch_size: int,
+    num_epochs: int,
+    num_workers: int,
+    train_transform_params: dict | None = None,
+    train_dataset_params: dict | None = None,
+    val_transform_params: dict | None = None,
+    val_dataset_params: dict | None = None,
+    checkpoint_path: str | pathlib.Path | None = None,
+    spect_scaler_path: str | pathlib.Path | None = None,
+    results_path: str | pathlib.Path | None = None,
+    normalize_spectrograms: bool = True,
+    shuffle: bool = True,
+    val_step: int | None = None,
+    ckpt_step: int | None = None,
+    patience: int | None = None,
+    device: str | None = None,
+    subset: str | None = None,
+) -> None:
+    """Train a model from the Variational Autoencoder family
+    and save results.
+
+    Parameters
+    ----------
+    model_name : str
+        Model name, must be one of vak.models.registry.MODEL_NAMES.
+    model_config : dict
+        Model configuration in a ``dict``,
+        as loaded from a .toml file,
+        and used by the model method ``from_config``.
+    dataset_path : str
+        Path to dataset, a directory generated by running ``vak prep``.
+    batch_size : int
+        number of samples per batch presented to models during training.
+    num_epochs : int
+        number of training epochs. One epoch = one iteration through the entire
+        training set.
+    num_workers : int
+        Number of processes to use for parallel loading of data.
+        Argument to torch.DataLoader.
+    train_transform_params
+    train_dataset_params
+    val_transform_params
+    val_dataset_params
+    checkpoint_path
+    spect_scaler_path
+    results_path
+    normalize_spectrograms
+    shuffle
+    val_step
+    ckpt_step
+    patience
+    device
+    subset
+
+    Returns
+    -------
+
+    """
+    for path, path_name in zip(
+        (checkpoint_path, spect_scaler_path),
+        ("checkpoint_path", "spect_scaler_path"),
+    ):
+        if path is not None:
+            if not validators.is_a_file(path):
+                raise FileNotFoundError(
+                    f"value for ``{path_name}`` not recognized as a file: {path}"
+                )
+
+    dataset_path = pathlib.Path(dataset_path)
+    if not dataset_path.exists() or not dataset_path.is_dir():
+        raise NotADirectoryError(
+            f"`dataset_path` not found or not recognized as a directory: {dataset_path}"
+        )
+
+    logger.info(
+        f"Loading dataset from path: {dataset_path}",
+    )
+    metadata = datasets.parametric_umap.Metadata.from_dataset_path(
+        dataset_path
+    )
+    dataset_csv_path = dataset_path / metadata.dataset_csv_filename
+    dataset_df = pd.read_csv(dataset_csv_path)
+    # ---------------- pre-conditions ----------------------------------------------------------------------------------
+    if val_step and not dataset_df["split"].str.contains("val").any():
+        raise ValueError(
+            f"val_step set to {val_step} but dataset does not contain a validation set; "
+            f"please run `vak prep` with a config.toml file that specifies a duration for the validation set."
+        )
+
+    # ---- set up directory to save output -----------------------------------------------------------------------------
+    results_path = pathlib.Path(results_path).expanduser().resolve()
+    if not results_path.is_dir():
+        raise NotADirectoryError(
+            f"results_path not recognized as a directory: {results_path}"
+        )
+
+    # ---------------- load training data  -----------------------------------------------------------------------------
+    logger.info(f"Using training split from dataset: {dataset_path}")
+    # below, if we're going to train network to predict unlabeled segments, then
+    # we need to include a class for those unlabeled segments in labelmap,
+    # the mapping from labelset provided by user to a set of consecutive
+    # integers that the network learns to predict
+    train_dur = get_split_dur(dataset_df, "train")
+    print(
+        f"Total duration of training split from dataset (in s): {train_dur}",
+    )
+
+    train_transform_params = {}
+    transform = transforms.defaults.get_default_transform(
+        "ConvEncoderUMAP", "train", train_transform_params
+    )
+
+    if train_transform_params is None:
+        train_transform_params = {}
+    train_dataset = SegmentDataset.from_dataset_path(
+        dataset_path=dataset_path,
+        split="train",
+        transform=transform,
+    )
+
+    train_loader = torch.utils.data.DataLoader(
+        dataset=train_dataset,
+        shuffle=True,
+        batch_size=64,
+        num_workers=16,
+    )
+
+    # ---------------- load validation set (if there is one) -----------------------------------------------------------
+
+    val_transform_params = {}
+    transform = vak.transforms.defaults.get_default_transform(
+        "ConvEncoderUMAP", "eval", val_transform_params
+    )
+    val_dataset_params = {}
+    val_dataset = SpectrogramPipe.from_dataset_path(
+        dataset_path=dataset_path,
+        split="val",
+        transform=transform,
+        **val_dataset_params,
+    )
+    print(
+        f"Duration of ParametricUMAPDataset used for validation, in seconds: {val_dataset.duration}",
+    )
+    val_loader = torch.utils.data.DataLoader(
+        dataset=val_dataset,
+        shuffle=False,
+        batch_size=64,
+        num_workers=16,
+    )
+
+    device = vak.common.device.get_default()
+
+    model = vak.models.get(
+        "AVA",
+        config={"network": {}, "optimizer": {"lr": 0.001}},
+        input_shape=train_dataset.shape,
+    )
+
+    results_model_root = results_path.joinpath("AVA")
+    results_model_root.mkdir(exist_ok=True)
+    ckpt_root = results_model_root.joinpath("checkpoints")
+    ckpt_root.mkdir(exist_ok=True)
+
+    trainer = get_trainer(
+        max_epochs=50,
+        log_save_dir=results_model_root,
+        device=device,
+        ckpt_root=ckpt_root,
+        ckpt_step=250,
+    )
+
+    trainer.fit(
+        model=model,
+        train_dataloaders=train_loader,
+        val_dataloaders=val_loader,
+    )

From 78c7f74a68dfdb2123f71f56b21eb029227f472a Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 18:55:46 -0500
Subject: [PATCH 052/150] Add src/vak/datasets/validators.py with validators
 for metadata

---
 src/vak/datasets/validators.py | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)
 create mode 100644 src/vak/datasets/validators.py

diff --git a/src/vak/datasets/validators.py b/src/vak/datasets/validators.py
new file mode 100644
index 000000000..f37be171d
--- /dev/null
+++ b/src/vak/datasets/validators.py
@@ -0,0 +1,30 @@
+"""Validators used with metadata"""
+def is_valid_dataset_csv_filename(instance, attribute, value):
+    valid = "_prep_" in value and value.endswith(".csv")
+    if not valid:
+        raise ValueError(
+            f"Invalid dataset csv filename: {value}."
+            f'Filename should contain the string "_prep_" '
+            f"and end with the extension .csv."
+            f"Valid filenames are generated by "
+            f"vak.core.prep.generate_dataset_csv_filename"
+        )
+
+
+def is_valid_audio_format(instance, attribute, value):
+    import vak.common.constants
+
+    if value not in vak.common.constants.VALID_AUDIO_FORMATS:
+        raise ValueError(
+            f"Not a valid audio format: {value}. Valid audio formats are: {vak.common.constants.VALID_AUDIO_FORMATS}"
+        )
+
+
+def is_valid_spect_format(instance, attribute, value):
+    import vak.common.constants
+
+    if value not in vak.common.constants.VALID_SPECT_FORMATS:
+        raise ValueError(
+            f"Not a valid spectrogram format: {value}. "
+            f"Valid spectrogram formats are: {vak.common.constants.VALID_SPECT_FORMATS}"
+        )
\ No newline at end of file

From 0902f14681a2ab644b11602f577059d5c0e4e176 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 18:56:02 -0500
Subject: [PATCH 053/150] Use datasets/validators in
 src/vak/datasets/frame_classification/metadata.py

---
 .../datasets/frame_classification/metadata.py | 37 ++-----------------
 1 file changed, 4 insertions(+), 33 deletions(-)

diff --git a/src/vak/datasets/frame_classification/metadata.py b/src/vak/datasets/frame_classification/metadata.py
index 61c7cb918..d9f7c8f82 100644
--- a/src/vak/datasets/frame_classification/metadata.py
+++ b/src/vak/datasets/frame_classification/metadata.py
@@ -10,36 +10,7 @@
 
 import attr
 
-
-def is_valid_dataset_csv_filename(instance, attribute, value):
-    valid = "_prep_" in value and value.endswith(".csv")
-    if not valid:
-        raise ValueError(
-            f"Invalid dataset csv filename: {value}."
-            f'Filename should contain the string "_prep_" '
-            f"and end with the extension .csv."
-            f"Valid filenames are generated by "
-            f"vak.core.prep.generate_dataset_csv_filename"
-        )
-
-
-def is_valid_audio_format(instance, attribute, value):
-    import vak.common.constants
-
-    if value not in vak.common.constants.VALID_AUDIO_FORMATS:
-        raise ValueError(
-            f"Not a valid audio format: {value}. Valid audio formats are: {vak.common.constants.VALID_AUDIO_FORMATS}"
-        )
-
-
-def is_valid_spect_format(instance, attribute, value):
-    import vak.common.constants
-
-    if value not in vak.common.constants.VALID_SPECT_FORMATS:
-        raise ValueError(
-            f"Not a valid spectrogram format: {value}. "
-            f"Valid spectrogram formats are: {vak.common.constants.VALID_SPECT_FORMATS}"
-        )
+from .. import validators
 
 
 @attr.define
@@ -67,7 +38,7 @@ class Metadata:
     METADATA_JSON_FILENAME: ClassVar = "metadata.json"
 
     dataset_csv_filename: str = attr.field(
-        converter=str, validator=is_valid_dataset_csv_filename
+        converter=str, validator=validators.is_valid_dataset_csv_filename
     )
 
     input_type: str = attr.field()
@@ -97,13 +68,13 @@ def is_valid_frame_dur(self, attribute, value):
 
     audio_format: str = attr.field(
         converter=attr.converters.optional(str),
-        validator=attr.validators.optional(is_valid_audio_format),
+        validator=attr.validators.optional(validators.is_valid_audio_format),
         default=None,
     )
 
     spect_format: str = attr.field(
         converter=attr.converters.optional(str),
-        validator=attr.validators.optional(is_valid_spect_format),
+        validator=attr.validators.optional(validators.is_valid_spect_format),
         default=None,
     )
 

From dd1a6e6afe00cd1cc2a9e900cbb4443e0fbdc4a6 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 18:57:37 -0500
Subject: [PATCH 054/150] Clean up names in docstrings in
 src/vak/datasets/frame_classification/metadata.py

---
 .../datasets/frame_classification/metadata.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/vak/datasets/frame_classification/metadata.py b/src/vak/datasets/frame_classification/metadata.py
index d9f7c8f82..265bea293 100644
--- a/src/vak/datasets/frame_classification/metadata.py
+++ b/src/vak/datasets/frame_classification/metadata.py
@@ -1,6 +1,7 @@
 """A dataclass that represents metadata
-associated with a frame classification dataset,
-as generated by
+associated with a frame classification dataset.
+
+Metadata is generated by
 :func:`vak.core.prep.frame_classification.prep_frame_classification_dataset`"""
 from __future__ import annotations
 
@@ -16,8 +17,10 @@
 @attr.define
 class Metadata:
     """A dataclass that represents metadata
-    associated with a dataset that was
-    generated by :func:`vak.core.prep.prep`.
+    associated with a frame classification dataset.
+
+    Metadata is generated by
+    :func:`vak.core.prep.frame_classification.prep_frame_classification_dataset`
 
     Attributes
     ----------
@@ -83,7 +86,7 @@ def from_path(cls, json_path: str | pathlib.Path) -> Metadata:
         """Load dataset metadata from a json file.
 
         Class method that returns an instance of
-        :class:`~vak.datasets.frame_classification.FrameClassificationDatatsetMetadata`.
+        :class:`~vak.datasets.frame_classification.Metadata`.
 
         Parameters
         ----------
@@ -94,8 +97,8 @@ def from_path(cls, json_path: str | pathlib.Path) -> Metadata:
 
         Returns
         -------
-        metadata : vak.datasets.frame_classification.FrameClassificationDatatsetMetadata
-            Instance of :class:`~vak.datasets.frame_classification.FrameClassificationDatatsetMetadata`
+        metadata : vak.datasets.frame_classification.Metadata
+            Instance of :class:`~vak.datasets.frame_classification.Metadata`
             with metadata loaded from json file.
         """
         json_path = pathlib.Path(json_path)
@@ -124,7 +127,7 @@ def to_json(self, dataset_path: str | pathlib.Path) -> None:
 
         This method is called by :func:`vak.core.prep.prep`
         after it generates a dataset and then creates an
-        instance of :class:`~vak.datasets.frame_classification.FrameClassificationDatatsetMetadata`
+        instance of :class:`~vak.datasets.frame_classification.Metadata`
         with metadata about that dataset.
 
         Parameters

From 1361f0396f257b9bc4305078f358b7d206783f77 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 19:08:07 -0500
Subject: [PATCH 055/150] Further fix docstrings + use datasets.validators in
 src/vak/datasets/parametric_umap/metadata.py

---
 src/vak/datasets/parametric_umap/metadata.py | 48 ++++----------------
 1 file changed, 10 insertions(+), 38 deletions(-)

diff --git a/src/vak/datasets/parametric_umap/metadata.py b/src/vak/datasets/parametric_umap/metadata.py
index 783c9bdfc..cfcc2483e 100644
--- a/src/vak/datasets/parametric_umap/metadata.py
+++ b/src/vak/datasets/parametric_umap/metadata.py
@@ -11,36 +11,7 @@
 
 import attr
 
-
-def is_valid_dataset_csv_filename(instance, attribute, value):
-    valid = "_prep_" in value and value.endswith(".csv")
-    if not valid:
-        raise ValueError(
-            f"Invalid dataset csv filename: {value}."
-            f'Filename should contain the string "_prep_" '
-            f"and end with the extension .csv."
-            f"Valid filenames are generated by "
-            f"vak.core.prep.generate_dataset_csv_filename"
-        )
-
-
-def is_valid_audio_format(instance, attribute, value):
-    import vak.common.constants
-
-    if value not in vak.common.constants.VALID_AUDIO_FORMATS:
-        raise ValueError(
-            f"Not a valid audio format: {value}. Valid audio formats are: {vak.common.constants.VALID_AUDIO_FORMATS}"
-        )
-
-
-def is_valid_spect_format(instance, attribute, value):
-    import vak.common.constants
-
-    if value not in vak.common.constants.VALID_SPECT_FORMATS:
-        raise ValueError(
-            f"Not a valid spectrogram format: {value}. "
-            f"Valid spectrogram formats are: {vak.common.constants.VALID_SPECT_FORMATS}"
-        )
+from .. import validators
 
 
 @attr.define
@@ -68,7 +39,7 @@ class Metadata:
     METADATA_JSON_FILENAME: ClassVar = "metadata.json"
 
     dataset_csv_filename: str = attr.field(
-        converter=str, validator=is_valid_dataset_csv_filename
+        converter=str, validator=validators.is_valid_dataset_csv_filename
     )
 
     shape: tuple = attr.field(converter=tuple)
@@ -86,7 +57,7 @@ def is_valid_shape(self, attribute, value):
 
     audio_format: str = attr.field(
         converter=attr.converters.optional(str),
-        validator=attr.validators.optional(is_valid_audio_format),
+        validator=attr.validators.optional(validators.is_valid_audio_format),
         default=None,
     )
 
@@ -95,7 +66,7 @@ def from_path(cls, json_path: str | pathlib.Path):
         """Load dataset metadata from a json file.
 
         Class method that returns an instance of
-        :class:`~vak.datasets.frame_classification.FrameClassificationDatatsetMetadata`.
+        :class:`~vak.datasets.parametric_umap.Metadata`.
 
         Parameters
         ----------
@@ -106,8 +77,8 @@ def from_path(cls, json_path: str | pathlib.Path):
 
         Returns
         -------
-        metadata : vak.datasets.frame_classification.FrameClassificationDatatsetMetadata
-            Instance of :class:`~vak.datasets.frame_classification.FrameClassificationDatatsetMetadata`
+        metadata : vak.datasets.parametric_umap.Metadata
+            Instance of :class:`~vak.datasets.parametric_umap.Metadata`
             with metadata loaded from json file.
         """
         json_path = pathlib.Path(json_path)
@@ -142,10 +113,11 @@ def to_json(self, dataset_path: str | pathlib.Path) -> None:
         Parameters
         ----------
         dataset_path : string, pathlib.Path
-            Path to root of a directory representing a dataset
-            generated by :func:`vak.core.prep.prep`.
-            where 'metadata.json' file
+            Path where 'metadata.json' file
             should be saved.
+            Typically the root of a directory representing a dataset
+            generated by
+            :func:`vak.core.prep.parametric_umap.prep_parametric_umap_dataset`
         """
         dataset_path = pathlib.Path(dataset_path)
         if not dataset_path.exists() or not dataset_path.is_dir():

From 492809163395ad919bd04b227611a4d4a27f29ec Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 19:12:30 -0500
Subject: [PATCH 056/150] Further fixes to docstring in
 src/vak/datasets/frame_classification/metadata.py

---
 src/vak/datasets/frame_classification/metadata.py | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/src/vak/datasets/frame_classification/metadata.py b/src/vak/datasets/frame_classification/metadata.py
index 265bea293..711bf556c 100644
--- a/src/vak/datasets/frame_classification/metadata.py
+++ b/src/vak/datasets/frame_classification/metadata.py
@@ -2,7 +2,7 @@
 associated with a frame classification dataset.
 
 Metadata is generated by
-:func:`vak.core.prep.frame_classification.prep_frame_classification_dataset`"""
+:func:`vak.core.prep.frame_classification.prep_frame_classification_dataset`."""
 from __future__ import annotations
 
 import json
@@ -133,10 +133,11 @@ def to_json(self, dataset_path: str | pathlib.Path) -> None:
         Parameters
         ----------
         dataset_path : string, pathlib.Path
-            Path to root of a directory representing a dataset
-            generated by :func:`vak.core.prep.prep`.
-            where 'metadata.json' file
-            should be saved.
+            Path where 'metadata.json' file
+            should be saved. Typically,
+            the root of a directory representing a dataset
+            generated by
+            :func:`vak.core.prep.frame_classification.prep_frame_classification_dataset`.
         """
         dataset_path = pathlib.Path(dataset_path)
         if not dataset_path.exists() or not dataset_path.is_dir():

From 78cd1e62fb94a8c54f37d3b1a60cfdbb501548ce Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 19:12:47 -0500
Subject: [PATCH 057/150] Fix docstrings, use datasets.validators in
 src/vak/datasets/vae/metadata.py

---
 src/vak/datasets/vae/metadata.py | 59 +++++++++-----------------------
 1 file changed, 16 insertions(+), 43 deletions(-)

diff --git a/src/vak/datasets/vae/metadata.py b/src/vak/datasets/vae/metadata.py
index c193b82f9..9d4aaca1c 100644
--- a/src/vak/datasets/vae/metadata.py
+++ b/src/vak/datasets/vae/metadata.py
@@ -1,7 +1,8 @@
 """A dataclass that represents metadata
-associated with a VAE dataset,
-as generated by
-:func:`vak.core.prep.frame_classification.prep_dimensionality_reduction_dataset`"""
+associated with a VAE dataset.
+
+The metadata is generated by
+:func:`vak.core.prep.vae.prep_vae_dataset`."""
 from __future__ import annotations
 
 import json
@@ -10,36 +11,7 @@
 
 import attr
 
-
-def is_valid_dataset_csv_filename(instance, attribute, value):
-    valid = "_prep_" in value and value.endswith(".csv")
-    if not valid:
-        raise ValueError(
-            f"Invalid dataset csv filename: {value}."
-            f'Filename should contain the string "_prep_" '
-            f"and end with the extension .csv."
-            f"Valid filenames are generated by "
-            f"vak.core.prep.generate_dataset_csv_filename"
-        )
-
-
-def is_valid_audio_format(instance, attribute, value):
-    import vak.common.constants
-
-    if value not in vak.common.constants.VALID_AUDIO_FORMATS:
-        raise ValueError(
-            f"Not a valid audio format: {value}. Valid audio formats are: {vak.common.constants.VALID_AUDIO_FORMATS}"
-        )
-
-
-def is_valid_spect_format(instance, attribute, value):
-    import vak.common.constants
-
-    if value not in vak.common.constants.VALID_SPECT_FORMATS:
-        raise ValueError(
-            f"Not a valid spectrogram format: {value}. "
-            f"Valid spectrogram formats are: {vak.common.constants.VALID_SPECT_FORMATS}"
-        )
+from .. import validators
 
 
 @attr.define
@@ -62,7 +34,7 @@ class Metadata:
     METADATA_JSON_FILENAME: ClassVar = "metadata.json"
 
     dataset_csv_filename: str = attr.field(
-        converter=str, validator=is_valid_dataset_csv_filename
+        converter=str, validator=validators.is_valid_dataset_csv_filename
     )
 
     shape: tuple = attr.field(converter=tuple)
@@ -80,7 +52,7 @@ def is_valid_shape(self, attribute, value):
 
     audio_format: str = attr.field(
         converter=attr.converters.optional(str),
-        validator=attr.validators.optional(is_valid_audio_format),
+        validator=attr.validators.optional(validators.is_valid_audio_format),
         default=None,
     )
 
@@ -89,7 +61,7 @@ def from_path(cls, json_path: str | pathlib.Path):
         """Load dataset metadata from a json file.
 
         Class method that returns an instance of
-        :class:`~vak.datasets.frame_classification.FrameClassificationDatatsetMetadata`.
+        :class:`~vak.datasets.vae.Metadata`.
 
         Parameters
         ----------
@@ -100,8 +72,8 @@ def from_path(cls, json_path: str | pathlib.Path):
 
         Returns
         -------
-        metadata : vak.datasets.frame_classification.FrameClassificationDatatsetMetadata
-            Instance of :class:`~vak.datasets.frame_classification.FrameClassificationDatatsetMetadata`
+        metadata : vak.datasets.vae.Metadata
+            Instance of :class:`~vak.datasets.vae.Metadata`
             with metadata loaded from json file.
         """
         json_path = pathlib.Path(json_path)
@@ -130,16 +102,17 @@ def to_json(self, dataset_path: str | pathlib.Path) -> None:
 
         This method is called by :func:`vak.core.prep.prep`
         after it generates a dataset and then creates an
-        instance of :class:`~vak.datasets.frame_classification.FrameClassificationDatatsetMetadata`
+        instance of :class:`~vak.datasets.vae.Metadata`
         with metadata about that dataset.
 
         Parameters
         ----------
         dataset_path : string, pathlib.Path
-            Path to root of a directory representing a dataset
-            generated by :func:`vak.core.prep.prep`.
-            where 'metadata.json' file
-            should be saved.
+            Path where 'metadata.json' file
+            should be saved. Typically, the root
+            of a directory representing a dataset
+            generated by
+            :func:`vak.core.prep.vae.prep_vae_dataset`.
         """
         dataset_path = pathlib.Path(dataset_path)
         if not dataset_path.exists() or not dataset_path.is_dir():

From b215a775505210db22b33f4274f37869a0fa202c Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 19:14:03 -0500
Subject: [PATCH 058/150] Import vae in src/vak/datasets/__init__.py

---
 src/vak/datasets/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/vak/datasets/__init__.py b/src/vak/datasets/__init__.py
index 0a8cc3764..8b73bdbd5 100644
--- a/src/vak/datasets/__init__.py
+++ b/src/vak/datasets/__init__.py
@@ -1,3 +1,3 @@
-from . import frame_classification, parametric_umap
+from . import frame_classification, parametric_umap, vae
 
-__all__ = ["frame_classification", "parametric_umap"]
+__all__ = ["frame_classification", "parametric_umap", "vae"]

From fd7a8dbe48a0e1f8358335a95cc2aebd5144cc9e Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 19:24:40 -0500
Subject: [PATCH 059/150] Fix names in
 src/vak/prep/parametric_umap/parametric_umap.py

---
 src/vak/prep/parametric_umap/parametric_umap.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/vak/prep/parametric_umap/parametric_umap.py b/src/vak/prep/parametric_umap/parametric_umap.py
index b544c8458..9cb1d7cb6 100644
--- a/src/vak/prep/parametric_umap/parametric_umap.py
+++ b/src/vak/prep/parametric_umap/parametric_umap.py
@@ -158,13 +158,13 @@ def prep_parametric_umap_dataset(
                 f"with ``purpose='{purpose}'."
             )
 
-    logger.info(f"Purpose for frame classification dataset: {purpose}")
+    logger.info(f"Purpose for parametric UMAP dataset: {purpose}")
     # ---- set up directory that will contain dataset, and csv file name -----------------------------------------------
     data_dir_name = data_dir.name
     timenow = get_timenow_as_str()
     dataset_path = (
         output_dir
-        / f"{data_dir_name}-vak-dimensionality-reduction-dataset-generated-{timenow}"
+        / f"{data_dir_name}-vak-parametric-UMAP-dataset-generated-{timenow}"
     )
     dataset_path.mkdir()
 

From 0b2d7f9c10f1d0c5651a39af17739a8966c70b61 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 19:25:07 -0500
Subject: [PATCH 060/150] WIP: Add src/vak/prep/vae/vae.py

---
 src/vak/prep/vae/__init__.py |   1 +
 src/vak/prep/vae/vae.py      | 356 +++++++++++++++++++++++++++++++++++
 2 files changed, 357 insertions(+)
 create mode 100644 src/vak/prep/vae/__init__.py

diff --git a/src/vak/prep/vae/__init__.py b/src/vak/prep/vae/__init__.py
new file mode 100644
index 000000000..3fcb3a6f6
--- /dev/null
+++ b/src/vak/prep/vae/__init__.py
@@ -0,0 +1 @@
+from .vae import prep_vae_dataset
diff --git a/src/vak/prep/vae/vae.py b/src/vak/prep/vae/vae.py
index e69de29bb..651229f62 100644
--- a/src/vak/prep/vae/vae.py
+++ b/src/vak/prep/vae/vae.py
@@ -0,0 +1,356 @@
+"""Prepare datasets for parametric UMAP models."""
+from __future__ import annotations
+
+import json
+import logging
+import pathlib
+import warnings
+
+import crowsetta
+
+from ... import datasets
+from ...common import labels
+from ...common.converters import expanded_user_path, labelset_to_set
+from ...common.logging import config_logging_for_cli, log_version
+from ...common.timenow import get_timenow_as_str
+from .. import dataset_df_helper, split
+from ..unit_dataset import prep_unit_dataset
+from . import dataset_arrays
+
+logger = logging.getLogger(__name__)
+
+
+VAE_DATASET_TYPES = {
+    "segment-vae", "window-vae"
+}
+
+
+def prep_vae_dataset(
+    data_dir: str | pathlib.Path,
+    purpose: str,
+    dataset_type: str,
+    output_dir: str | pathlib.Path | None = None,
+    audio_format: str | None = None,
+    spect_params: dict | None = None,
+    annot_format: str | None = None,
+    annot_file: str | pathlib.Path | None = None,
+    labelset: set | None = None,
+    context_s: float = 0.015,
+    train_dur: int | None = None,
+    val_dur: int | None = None,
+    test_dur: int | None = None,
+    train_set_durs: list[float] | None = None,
+    num_replicates: int | None = None,
+    spect_key: str = "s",
+    timebins_key: str = "t",
+):
+    """Prepare datasets for parametric UMAP models.
+
+    For general information on dataset preparation,
+    see the docstring for :func:`vak.prep.prep`.
+
+    Parameters
+    ----------
+    data_dir : str, Path
+        Path to directory with files from which to make dataset.
+    purpose : str
+        Purpose of the dataset.
+        One of {'train', 'eval', 'predict', 'learncurve'}.
+        These correspond to commands of the vak command-line interface.
+    dataset_type : str
+        Type of VAE dataset. One of {"segment-vae", "window-vae"}.
+    output_dir : str
+        Path to location where data sets should be saved.
+        Default is ``None``, in which case it defaults to ``data_dir``.
+    audio_format : str
+        Format of audio files. One of {'wav', 'cbin'}.
+        Default is ``None``, but either ``audio_format`` or ``spect_format``
+        must be specified.
+    spect_params : dict, vak.config.SpectParams
+        Parameters for creating spectrograms. Default is ``None``.
+    annot_format : str
+        Format of annotations. Any format that can be used with the
+        :module:`crowsetta` library is valid. Default is ``None``.
+    labelset : str, list, set
+        Set of unique labels for vocalizations. Strings or integers.
+        Default is ``None``. If not ``None``, then files will be skipped
+        where the associated annotation
+        contains labels not found in ``labelset``.
+        ``labelset`` is converted to a Python ``set`` using
+        :func:`vak.converters.labelset_to_set`.
+        See help for that function for details on how to specify ``labelset``.
+    train_dur : float
+        Total duration of training set, in seconds.
+        When creating a learning curve,
+        training subsets of shorter duration
+        will be drawn from this set. Default is None.
+    val_dur : float
+        Total duration of validation set, in seconds.
+        Default is None.
+    test_dur : float
+        Total duration of test set, in seconds.
+        Default is None.
+    train_set_durs : list
+        of int, durations in seconds of subsets taken from training data
+        to create a learning curve, e.g. [5, 10, 15, 20].
+    num_replicates : int
+        number of times to replicate training for each training set duration
+        to better estimate metrics for a training set of that size.
+        Each replicate uses a different randomly drawn subset of the training
+        data (but of the same duration).
+    spect_key : str
+        key for accessing spectrogram in files. Default is 's'.
+    timebins_key : str
+        key for accessing vector of time bins in files. Default is 't'.
+
+    Returns
+    -------
+    dataset_df : pandas.DataFrame
+        That represents a dataset.
+    dataset_path : pathlib.Path
+        Path to csv saved from ``dataset_df``.
+    """
+    from .. import constants  # avoid circular import
+
+    # pre-conditions ---------------------------------------------------------------------------------------------------
+    if purpose not in constants.VALID_PURPOSES:
+        raise ValueError(
+            f"purpose must be one of: {constants.VALID_PURPOSES}\n"
+            f"Value for purpose was: {purpose}"
+        )
+
+    if dataset_type not in VAE_DATASET_TYPES:
+        raise ValueError(
+            f"`dataset_type` must be one of '{VAE_DATASET_TYPES}', but was: {dataset_type}"
+        )
+
+    if labelset is not None:
+        labelset = labelset_to_set(labelset)
+
+    data_dir = expanded_user_path(data_dir)
+    if not data_dir.is_dir():
+        raise NotADirectoryError(
+            f"Path specified for ``data_dir`` not found: {data_dir}"
+        )
+
+    if output_dir:
+        output_dir = expanded_user_path(output_dir)
+    else:
+        output_dir = data_dir
+
+    if not output_dir.is_dir():
+        raise NotADirectoryError(
+            f"Path specified for ``output_dir`` not found: {output_dir}"
+        )
+
+    if annot_file is not None:
+        annot_file = expanded_user_path(annot_file)
+        if not annot_file.exists():
+            raise FileNotFoundError(
+                f"Path specified for ``annot_file`` not found: {annot_file}"
+            )
+
+    if purpose == "predict":
+        if labelset is not None:
+            warnings.warn(
+                "The ``purpose`` argument was set to 'predict`, but a ``labelset`` was provided."
+                "This would cause an error because the ``prep_spectrogram_dataset`` section will attempt to "
+                "check whether the files in the ``data_dir`` have labels in "
+                "``labelset``, even though those files don't have annotation.\n"
+                "Setting ``labelset`` to None."
+            )
+            labelset = None
+    else:  # if purpose is not predict
+        if labelset is None:
+            raise ValueError(
+                f"The ``purpose`` argument was set to '{purpose}', but no ``labelset`` was provided."
+                "This will cause an error when trying to split the dataset, "
+                "e.g. into training and test splits, "
+                "or a silent error, e.g. when calculating metrics with an evaluation set. "
+                "Please specify a ``labelset`` when calling ``vak.prep.frame_classification.prep`` "
+                f"with ``purpose='{purpose}'."
+            )
+
+    logger.info(f"Purpose for frame classification dataset: {purpose}")
+    # ---- set up directory that will contain dataset, and csv file name -----------------------------------------------
+    data_dir_name = data_dir.name
+    timenow = get_timenow_as_str()
+    dataset_path = (
+        output_dir
+        / f"{data_dir_name}-vak-dimensionality-reduction-dataset-generated-{timenow}"
+    )
+    dataset_path.mkdir()
+
+    if annot_file and annot_format == "birdsong-recognition-dataset":
+        # we do this normalization / canonicalization after we make dataset_path
+        # so that we can put the new annot_file inside of dataset_path, instead of
+        # making new files elsewhere on a user's system
+        logger.info(
+            "The ``annot_format`` argument was set to 'birdsong-recognition-format'; "
+            "this format requires the audio files for their sampling rate "
+            "to convert onset and offset times of birdsong syllables to seconds."
+            "Converting this format to 'generic-seq' now with the times in seconds, "
+            "so that the dataset prepared by vak will not require the audio files."
+        )
+        birdsongrec = crowsetta.formats.seq.BirdsongRec.from_file(annot_file)
+        annots = birdsongrec.to_annot()
+        # note we point `annot_file` at a new file we're about to make
+        annot_file = (
+            dataset_path / f"{annot_file.stem}.converted-to-generic-seq.csv"
+        )
+        # and we remake Annotations here so that annot_path points to this new file, not the birdsong-rec Annotation.xml
+        annots = [
+            crowsetta.Annotation(
+                seq=annot.seq,
+                annot_path=annot_file,
+                notated_path=annot.notated_path,
+            )
+            for annot in annots
+        ]
+        generic_seq = crowsetta.formats.seq.GenericSeq(annots=annots)
+        generic_seq.to_file(annot_file)
+        # and we now change `annot_format` as well. Both these will get passed to io.prep_spectrogram_dataset
+        annot_format = "generic-seq"
+
+    # NOTE we set up logging here (instead of cli) so the prep log is included in the dataset
+    config_logging_for_cli(
+        log_dst=dataset_path, log_stem="prep", level="INFO", force=True
+    )
+    log_version(logger)
+
+    dataset_csv_path = dataset_df_helper.get_dataset_csv_path(
+        dataset_path, data_dir_name, timenow
+    )
+    logger.info(f"Will prepare dataset as directory: {dataset_path}")
+
+    # ---- actually make the dataset -----------------------------------------------------------------------------------
+    dataset_df, shape = prep_unit_dataset(
+        audio_format=audio_format,
+        output_dir=dataset_path,
+        spect_params=spect_params,
+        data_dir=data_dir,
+        annot_format=annot_format,
+        annot_file=annot_file,
+        labelset=labelset,
+        context_s=context_s,
+    )
+
+    if dataset_df.empty:
+        raise ValueError(
+            "Calling `vak.prep.unit_dataset.prep_unit_dataset` "
+            "with arguments passed to `vak.core.prep.prep_dimensionality_reduction_dataset` "
+            "returned an empty dataframe.\n"
+            "Please double-check arguments to `vak.core.prep` function."
+        )
+
+    # save before (possibly) splitting, just in case duration args are not valid
+    # (we can't know until we make dataset)
+    dataset_df.to_csv(dataset_csv_path)
+
+    # ---- (possibly) split into train / val / test sets ---------------------------------------------
+    # catch case where user specified duration for just training set, raise a helpful error instead of failing silently
+    if (purpose == "train" or purpose == "learncurve") and (
+        (train_dur is not None and train_dur > 0)
+        and (val_dur is None or val_dur == 0)
+        and (test_dur is None or val_dur == 0)
+    ):
+        raise ValueError(
+            "A duration specified for just training set, but prep function does not currently support creating a "
+            "single split of a specified duration. Either remove the train_dur option from the prep section and "
+            "rerun, in which case all data will be included in the training set, or specify values greater than "
+            "zero for test_dur (and val_dur, if a validation set will be used)"
+        )
+
+    if all(
+        [dur is None for dur in (train_dur, val_dur, test_dur)]
+    ) or purpose in (
+        "eval",
+        "predict",
+    ):
+        # then we're not going to split
+        logger.info("Will not split dataset.")
+        do_split = False
+    else:
+        if val_dur is not None and train_dur is None and test_dur is None:
+            raise ValueError(
+                "cannot specify only val_dur, unclear how to split dataset into training and test sets"
+            )
+        else:
+            logger.info("Will split dataset.")
+            do_split = True
+
+    if do_split:
+        dataset_df = split.unit_dataframe(
+            dataset_df,
+            dataset_path,
+            labelset=labelset,
+            train_dur=train_dur,
+            val_dur=val_dur,
+            test_dur=test_dur,
+        )
+
+    elif (
+        do_split is False
+    ):  # add a split column, but assign everything to the same 'split'
+        # ideally we would just say split=purpose in call to add_split_col, but
+        # we have to special case, because "eval" looks for a 'test' split (not an "eval" split)
+        if purpose == "eval":
+            split_name = (
+                "test"  # 'split_name' to avoid name clash with split package
+            )
+        elif purpose == "predict":
+            split_name = "predict"
+
+        dataset_df = dataset_df_helper.add_split_col(
+            dataset_df, split=split_name
+        )
+
+    # ---- create and save labelmap ------------------------------------------------------------------------------------
+    # we do this before creating array files since we need to load the labelmap to make frame label vectors
+    if purpose != "predict":
+        # TODO: add option to generate predict using existing dataset, so we can get labelmap from it
+        labelmap = labels.to_map(labelset, map_unlabeled=False)
+        logger.info(
+            f"Number of classes in labelmap: {len(labelmap)}",
+        )
+        # save labelmap in case we need it later
+        with (dataset_path / "labelmap.json").open("w") as fp:
+            json.dump(labelmap, fp)
+    else:
+        labelmap = None
+
+    # ---- make arrays that represent final dataset --------------------------------------------------------------------
+    dataset_arrays.move_files_into_split_subdirs(
+        dataset_df,
+        dataset_path,
+        purpose,
+    )
+    #
+    # ---- if purpose is learncurve, additionally prep splits for that -----------------------------------------------
+    # if purpose == 'learncurve':
+    #     dataset_df = make_learncurve_splits_from_dataset_df(
+    #         dataset_df,
+    #         train_set_durs,
+    #         num_replicates,
+    #         dataset_path,
+    #         labelmap,
+    #         audio_format,
+    #         spect_key,
+    #         timebins_key,
+    #     )
+
+    # ---- save csv file that captures provenance of source data -------------------------------------------------------
+    logger.info(f"Saving dataset csv file: {dataset_csv_path}")
+    dataset_df.to_csv(
+        dataset_csv_path, index=False
+    )  # index is False to avoid having "Unnamed: 0" column when loading
+
+    # ---- save metadata -----------------------------------------------------------------------------------------------
+    metadata = datasets.parametric_umap.Metadata(
+        dataset_csv_filename=str(dataset_csv_path.name),
+        audio_format=audio_format,
+        shape=shape,
+    )
+    metadata.to_json(dataset_path)
+
+    return dataset_df, dataset_path

From 4a0123c2127e7b4e98a20ab98e2a93fd22d8fbf3 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 19:25:26 -0500
Subject: [PATCH 061/150] Import vae module in src/vak/prep/__init__.py

---
 src/vak/prep/__init__.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/vak/prep/__init__.py b/src/vak/prep/__init__.py
index 47c378daf..492da4dc6 100644
--- a/src/vak/prep/__init__.py
+++ b/src/vak/prep/__init__.py
@@ -8,6 +8,7 @@
     sequence_dataset,
     spectrogram_dataset,
     unit_dataset,
+    vae,
 )
 from .prep_ import prep
 
@@ -22,4 +23,5 @@
     "sequence_dataset",
     "spectrogram_dataset",
     "unit_dataset",
+    "vae",
 ]

From 8ec3b1eb19b454e7ff28cf3219ae384e9b980c5d Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 19:26:09 -0500
Subject: [PATCH 062/150] Add vae-window and vae-segment to
 DATASET_TYPE_FUNCTION_MAP in vae.prep_vae_dataset

---
 src/vak/prep/constants.py | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/vak/prep/constants.py b/src/vak/prep/constants.py
index 68399dd4c..bc72686ee 100644
--- a/src/vak/prep/constants.py
+++ b/src/vak/prep/constants.py
@@ -2,7 +2,7 @@
 
 Defined in a separate module to minimize circular imports.
 """
-from . import frame_classification, parametric_umap
+from . import frame_classification, parametric_umap, vae
 
 VALID_PURPOSES = frozenset(
     [
@@ -18,6 +18,8 @@
 DATASET_TYPE_FUNCTION_MAP = {
     "frame classification": frame_classification.prep_frame_classification_dataset,
     "parametric umap": parametric_umap.prep_parametric_umap_dataset,
+    "vae-window": vae.prep_vae_dataset,
+    "vae-segment": vae.prep_vae_dataset,
 }
 
 DATASET_TYPES = tuple(DATASET_TYPE_FUNCTION_MAP.keys())

From 9557a98644f97a0e0768872881241da15a2a0dac Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 19:26:37 -0500
Subject: [PATCH 063/150] WIP: Add vae to src/vak/prep/prep_.py

---
 src/vak/prep/prep_.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/src/vak/prep/prep_.py b/src/vak/prep/prep_.py
index a99e287d4..6fbacab99 100644
--- a/src/vak/prep/prep_.py
+++ b/src/vak/prep/prep_.py
@@ -6,6 +6,7 @@
 from . import constants
 from .frame_classification import prep_frame_classification_dataset
 from .parametric_umap import prep_parametric_umap_dataset
+from .vae import prep_vae_dataset
 
 logger = logging.getLogger(__name__)
 
@@ -232,6 +233,26 @@ def prep(
             timebins_key=timebins_key,
         )
         return dataset_df, dataset_path
+    elif dataset_type == "vae":
+        dataset_df, dataset_path = prep_parametric_umap_dataset(
+            data_dir,
+            purpose,
+            output_dir,
+            audio_format,
+            spect_params,
+            annot_format,
+            annot_file,
+            labelset,
+            context_s,
+            train_dur,
+            val_dur,
+            test_dur,
+            train_set_durs,
+            num_replicates,
+            spect_key=spect_key,
+            timebins_key=timebins_key,
+        )
+        return dataset_df, dataset_path
     else:
         # this is in case a dataset type is written wrong
         # in the if-else statements above, we want to error loudly

From 5056f380c3c9a3c09749c8c533035ce66e23285e Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 20:22:22 -0500
Subject: [PATCH 064/150] Add imports in src/vak/datasets/vae/__init__.py

---
 src/vak/datasets/vae/__init__.py | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/src/vak/datasets/vae/__init__.py b/src/vak/datasets/vae/__init__.py
index 133b3cd80..340bda9ae 100644
--- a/src/vak/datasets/vae/__init__.py
+++ b/src/vak/datasets/vae/__init__.py
@@ -1 +1,10 @@
+from .metadata import Metadata
 from .segment_dataset import SegmentDataset
+from .window_dataset import WindowDataset
+
+
+__all__ = [
+    "Metadata",
+    "SegmentDataset",
+    "WindowDataset",
+]

From 7e837d7c6f840cc5bcdcab5dfff96171ec32dd5f Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 20:23:01 -0500
Subject: [PATCH 065/150] Fix up src/vak/datasets/vae/window_dataset.py

---
 src/vak/datasets/vae/window_dataset.py | 45 +++++++++-----------------
 1 file changed, 16 insertions(+), 29 deletions(-)

diff --git a/src/vak/datasets/vae/window_dataset.py b/src/vak/datasets/vae/window_dataset.py
index 0d9309df7..ad2add327 100644
--- a/src/vak/datasets/vae/window_dataset.py
+++ b/src/vak/datasets/vae/window_dataset.py
@@ -1,4 +1,8 @@
-"""Dataset class used for training VAE models on fixed-sized windows, such as a "shotgun VAE" [1]_."""
+"""Dataset class used for training VAE models on fixed-sized windows, such as a "shotgun VAE" [1]_.
+
+.. [1] Goffinet, J., Brudner, S., Mooney, R., & Pearson, J. (2021).
+   Low-dimensional learned feature spaces quantify individual and group differences in vocal repertoires.
+   eLife, 10:e67855. https://doi.org/10.7554/eLife.67855"""
 from __future__ import annotations
 
 import pathlib
@@ -8,16 +12,12 @@
 import numpy.typing as npt
 import pandas as pd
 
-from . import constants, helper
+from ..frame_classification import constants, helper
 from .metadata import Metadata
 
 
-
-
-
 class WindowDataset:
-    """Dataset class used for training VAE models on fixed-sized windows,
-    such as a "shotgun VAE" [1]_.
+    """Dataset class used for training VAE models on fixed-sized windows, such as a "shotgun VAE" [1]_.
 
     Attributes
     ----------
@@ -73,6 +73,12 @@ class WindowDataset:
     transform : callable
         The transform applied to the frames,
          the input to the neural network :math:`x`.
+
+    References
+    ----------
+    .. [1] Goffinet, J., Brudner, S., Mooney, R., & Pearson, J. (2021).
+       Low-dimensional learned feature spaces quantify individual and group differences in vocal repertoires.
+       eLife, 10:e67855. https://doi.org/10.7554/eLife.67855
     """
 
     def __init__(
@@ -173,7 +179,6 @@ def __init__(
             )
         self.window_inds = window_inds
         self.transform = transform
-        self.target_transform = target_transform
 
     @property
     def duration(self):
@@ -208,9 +213,6 @@ def __getitem__(self, idx):
             sample_id = uniq_sample_ids[0]
             frames_path = self.dataset_path / self.frames_paths[sample_id]
             frames = self._load_frames(frames_path)
-            frame_labels = np.load(
-                self.dataset_path / self.frame_labels_paths[sample_id]
-            )
 
         elif len(uniq_sample_ids) > 1:
             frames = []
@@ -218,11 +220,6 @@ def __getitem__(self, idx):
             for sample_id in sorted(uniq_sample_ids):
                 frames_path = self.dataset_path / self.frames_paths[sample_id]
                 frames.append(self._load_frames(frames_path))
-                frame_labels.append(
-                    np.load(
-                        self.dataset_path / self.frame_labels_paths[sample_id]
-                    )
-                )
 
             if all([frames_.ndim == 1 for frames_ in frames]):
                 # --> all 1-d audio vectors; if we specify `axis=1` here we'd get error
@@ -240,15 +237,10 @@ def __getitem__(self, idx):
             ...,
             inds_in_sample : inds_in_sample + self.window_size,  # noqa: E203
         ]
-        frame_labels = frame_labels[
-            inds_in_sample : inds_in_sample + self.window_size  # noqa: E203
-        ]
         if self.transform:
             frames = self.transform(frames)
-        if self.target_transform:
-            frame_labels = self.target_transform(frame_labels)
 
-        return frames, frame_labels
+        return frames
 
     def __len__(self):
         """number of batches"""
@@ -263,10 +255,9 @@ def from_dataset_path(
         split: str = "train",
         subset: str | None = None,
         transform: Callable | None = None,
-        target_transform: Callable | None = None,
     ):
         """Make a :class:`WindowDataset` instance,
-        given the path to a frame classification dataset.
+        given the path to a VAE window dataset.
 
         Parameters
         ----------
@@ -294,13 +285,10 @@ def from_dataset_path(
             for use when generating a learning curve.
         transform : callable
             The transform applied to the input to the neural network :math:`x`.
-        target_transform : callable
-            The transform applied to the target for the output
-            of the neural network :math:`y`.
 
         Returns
         -------
-        dataset : vak.datasets.frame_classification.WindowDataset
+        dataset : vak.datasets.vae.WindowDataset
         """
         dataset_path = pathlib.Path(dataset_path)
         metadata = Metadata.from_dataset_path(dataset_path)
@@ -350,5 +338,4 @@ def from_dataset_path(
             subset,
             window_inds,
             transform,
-            target_transform,
         )

From d01e1dd60127ebabc079969d10ff0d91fdd34931 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 20:24:25 -0500
Subject: [PATCH 066/150] Fix call to prep_vae_dataset in src/vak/prep/prep_.py

---
 src/vak/prep/prep_.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/vak/prep/prep_.py b/src/vak/prep/prep_.py
index 6fbacab99..d6b45190b 100644
--- a/src/vak/prep/prep_.py
+++ b/src/vak/prep/prep_.py
@@ -234,9 +234,10 @@ def prep(
         )
         return dataset_df, dataset_path
     elif dataset_type == "vae":
-        dataset_df, dataset_path = prep_parametric_umap_dataset(
+        dataset_df, dataset_path = prep_vae_dataset(
             data_dir,
             purpose,
+            dataset_type,
             output_dir,
             audio_format,
             spect_params,

From 980bf83af84cc35cf4ebd1d6d44699f1c0f184b4 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 20:52:17 -0500
Subject: [PATCH 067/150] WIP: Add src/vak/prep/vae/segment_vae.py

---
 src/vak/prep/vae/segment_vae.py | 148 ++++++++++++++++++++++++++++++++
 1 file changed, 148 insertions(+)
 create mode 100644 src/vak/prep/vae/segment_vae.py

diff --git a/src/vak/prep/vae/segment_vae.py b/src/vak/prep/vae/segment_vae.py
new file mode 100644
index 000000000..b4c453996
--- /dev/null
+++ b/src/vak/prep/vae/segment_vae.py
@@ -0,0 +1,148 @@
+""""""
+from __future__ import annotations
+
+import json
+import logging
+import pathlib
+
+from ...common import labels
+from .. import dataset_df_helper, split
+from ..unit_dataset import prep_unit_dataset
+from ..parametric_umap import dataset_arrays
+
+
+logger = logging.getLogger(__name__)
+
+
+def prep_segment_vae_dataset(
+        data_dir: str | pathlib.Path,
+        dataset_path: str | pathlib.Path,
+        dataset_csv_path: str | pathlib.Path,
+        purpose: str,
+        audio_format: str | None = None,
+        spect_params: dict | None = None,
+        annot_format: str | None = None,
+        annot_file: str | pathlib.Path | None = None,
+        labelset: set | None = None,
+        context_s: float = 0.015,
+        train_dur: int | None = None,
+        val_dur: int | None = None,
+        test_dur: int | None = None,
+        train_set_durs: list[float] | None = None,
+        num_replicates: int | None = None,
+        spect_key: str = "s",
+        timebins_key: str = "t",
+):
+    dataset_df, shape = prep_unit_dataset(
+        audio_format=audio_format,
+        output_dir=dataset_path,
+        spect_params=spect_params,
+        data_dir=data_dir,
+        annot_format=annot_format,
+        annot_file=annot_file,
+        labelset=labelset,
+        context_s=context_s,
+    )
+    if dataset_df.empty:
+        raise ValueError(
+            "Calling `vak.prep.unit_dataset.prep_unit_dataset` "
+            "with arguments passed to `vak.core.prep.prep_dimensionality_reduction_dataset` "
+            "returned an empty dataframe.\n"
+            "Please double-check arguments to `vak.core.prep` function."
+        )
+
+    # save before (possibly) splitting, just in case duration args are not valid
+    # (we can't know until we make dataset)
+    dataset_df.to_csv(dataset_csv_path)
+
+    # ---- (possibly) split into train / val / test sets ---------------------------------------------
+    # catch case where user specified duration for just training set, raise a helpful error instead of failing silently
+    if (purpose == "train" or purpose == "learncurve") and (
+            (train_dur is not None and train_dur > 0)
+            and (val_dur is None or val_dur == 0)
+            and (test_dur is None or val_dur == 0)
+    ):
+        raise ValueError(
+            "A duration specified for just training set, but prep function does not currently support creating a "
+            "single split of a specified duration. Either remove the train_dur option from the prep section and "
+            "rerun, in which case all data will be included in the training set, or specify values greater than "
+            "zero for test_dur (and val_dur, if a validation set will be used)"
+        )
+
+    if all(
+            [dur is None for dur in (train_dur, val_dur, test_dur)]
+    ) or purpose in (
+            "eval",
+            "predict",
+    ):
+        # then we're not going to split
+        logger.info("Will not split dataset.")
+        do_split = False
+    else:
+        if val_dur is not None and train_dur is None and test_dur is None:
+            raise ValueError(
+                "cannot specify only val_dur, unclear how to split dataset into training and test sets"
+            )
+        else:
+            logger.info("Will split dataset.")
+            do_split = True
+
+    if do_split:
+        dataset_df = split.unit_dataframe(
+            dataset_df,
+            dataset_path,
+            labelset=labelset,
+            train_dur=train_dur,
+            val_dur=val_dur,
+            test_dur=test_dur,
+        )
+
+    elif (
+            do_split is False
+    ):  # add a split column, but assign everything to the same 'split'
+        # ideally we would just say split=purpose in call to add_split_col, but
+        # we have to special case, because "eval" looks for a 'test' split (not an "eval" split)
+        if purpose == "eval":
+            split_name = (
+                "test"  # 'split_name' to avoid name clash with split package
+            )
+        elif purpose == "predict":
+            split_name = "predict"
+
+        dataset_df = dataset_df_helper.add_split_col(
+            dataset_df, split=split_name
+        )
+
+    # ---- create and save labelmap ------------------------------------------------------------------------------------
+    # we do this before creating array files since we need to load the labelmap to make frame label vectors
+    if purpose != "predict":
+        # TODO: add option to generate predict using existing dataset, so we can get labelmap from it
+        labelmap = labels.to_map(labelset, map_unlabeled=False)
+        logger.info(
+            f"Number of classes in labelmap: {len(labelmap)}",
+        )
+        # save labelmap in case we need it later
+        with (dataset_path / "labelmap.json").open("w") as fp:
+            json.dump(labelmap, fp)
+    else:
+        labelmap = None
+
+    # ---- make arrays that represent final dataset --------------------------------------------------------------------
+    dataset_arrays.move_files_into_split_subdirs(
+        dataset_df,
+        dataset_path,
+        purpose,
+    )
+    #
+    # ---- if purpose is learncurve, additionally prep splits for that -----------------------------------------------
+    # if purpose == 'learncurve':
+    #     dataset_df = make_learncurve_splits_from_dataset_df(
+    #         dataset_df,
+    #         train_set_durs,
+    #         num_replicates,
+    #         dataset_path,
+    #         labelmap,
+    #         audio_format,
+    #         spect_key,
+    #         timebins_key,
+    #     )

From 136ea660333729faca2da67f1e948b2358e1bfef Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 20:52:24 -0500
Subject: [PATCH 068/150] WIP: Add src/vak/prep/vae/window_vae.py

---
 src/vak/prep/vae/window_vae.py | 111 +++++++++++++++++++++++++++++++++
 1 file changed, 111 insertions(+)
 create mode 100644 src/vak/prep/vae/window_vae.py

diff --git a/src/vak/prep/vae/window_vae.py b/src/vak/prep/vae/window_vae.py
new file mode 100644
index 000000000..277f37160
--- /dev/null
+++ b/src/vak/prep/vae/window_vae.py
@@ -0,0 +1,111 @@
+from __future__ import annotations
+
+import json
+import logging
+import pathlib
+
+import pandas as pd
+
+from ...common import labels
+from ..spectrogram_dataset import prep_spectrogram_dataset
+from ..frame_classification.assign_samples_to_splits import assign_samples_to_splits
+
+
+logger = logging.getLogger(__name__)
+
+
+def prep_window_vae_dataset(
+    data_dir: str | pathlib.Path,
+    dataset_path: str | pathlib.Path,
+    dataset_csv_path: str | pathlib.Path,
+    purpose: str,
+    audio_format: str | None = None,
+    spect_params: dict | None = None,
+    annot_format: str | None = None,
+    annot_file: str | pathlib.Path | None = None,
+    labelset: set | None = None,
+    train_dur: int | None = None,
+    val_dur: int | None = None,
+    test_dur: int | None = None,
+    train_set_durs: list[float] | None = None,
+    num_replicates: int | None = None,
+    spect_key: str = "s",
+    timebins_key: str = "t",
+):
+    source_files_df = prep_spectrogram_dataset(
+        data_dir,
+        annot_format,
+        labelset,
+        annot_file,
+        audio_format,
+        spect_format,
+        spect_params,
+        spect_output_dir,
+        audio_dask_bag_kwargs,
+    )
+
+    # save before (possibly) splitting, just in case duration args are not valid
+    # (we can't know until we make dataset)
+    source_files_df.to_csv(dataset_csv_path)
+
+    # ---- assign samples to splits; adds a 'split' column to dataset_df, calling `vak.prep.split` if needed -----------
+    # once we assign a split, we consider this the ``dataset_df``
+    dataset_df: pd.DataFrame = assign_samples_to_splits(
+        purpose,
+        source_files_df,
+        dataset_path,
+        train_dur,
+        val_dur,
+        test_dur,
+        labelset,
+    )
+
+    # ---- create and save labelmap ------------------------------------------------------------------------------------
+    # we do this before creating array files since we need to load the labelmap to make frame label vectors
+    if purpose != "predict":
+        # TODO: add option to generate predict using existing dataset, so we can get labelmap from it
+        map_unlabeled_segments = sequence_dataset.has_unlabeled_segments(
+            dataset_df
+        )
+        labelmap = labels.to_map(
+            labelset, map_unlabeled=map_unlabeled_segments
+        )
+        logger.info(
+            f"Number of classes in labelmap: {len(labelmap)}",
+        )
+        # save labelmap in case we need it later
+        with (dataset_path / "labelmap.json").open("w") as fp:
+            json.dump(labelmap, fp)
+    else:
+        labelmap = None
+
+    # ---- actually move/copy/create files into directories representing splits ----------------------------------------
+    # now we're *remaking* the dataset_df (actually adding additional rows with the splits)
+    dataset_df: pd.DataFrame = make_splits(
+        dataset_df,
+        dataset_path,
+        input_type,
+        purpose,
+        labelmap,
+        audio_format,
+        spect_key,
+        timebins_key,
+        freqbins_key,
+    )
+
+    # ---- if purpose is learncurve, additionally prep training data subsets for the learning curve --------------------
+    if purpose == "learncurve":
+        dataset_df: pd.DataFrame = make_subsets_from_dataset_df(
+            dataset_df,
+            input_type,
+            train_set_durs,
+            num_replicates,
+            dataset_path,
+            labelmap,
+        )
+
+    # ---- save csv file that captures provenance of source data -------------------------------------------------------
+    logger.info(f"Saving dataset csv file: {dataset_csv_path}")
+    dataset_df.to_csv(
+        dataset_csv_path, index=False
+    )  # index is False to avoid having "Unnamed: 0" column when loading

From 169bdb77811ac7aa6a6313e3f6535353f7cb3915 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 20:52:42 -0500
Subject: [PATCH 069/150] Rewrite prep_vae_dataset to call
 prep_segment_vae_dataset or prep_window_vae_dataset

---
 src/vak/prep/vae/vae.py | 155 +++++++++++-----------------------------
 1 file changed, 40 insertions(+), 115 deletions(-)

diff --git a/src/vak/prep/vae/vae.py b/src/vak/prep/vae/vae.py
index 651229f62..602edec03 100644
--- a/src/vak/prep/vae/vae.py
+++ b/src/vak/prep/vae/vae.py
@@ -1,4 +1,4 @@
-"""Prepare datasets for parametric UMAP models."""
+"""Prepare datasets for VAE models."""
 from __future__ import annotations
 
 import json
@@ -7,6 +7,7 @@
 import warnings
 
 import crowsetta
+import pandas as pd
 
 from ... import datasets
 from ...common import labels
@@ -15,7 +16,10 @@
 from ...common.timenow import get_timenow_as_str
 from .. import dataset_df_helper, split
 from ..unit_dataset import prep_unit_dataset
-from . import dataset_arrays
+from ..parametric_umap import dataset_arrays
+from ..spectrogram_dataset import prep_spectrogram_dataset
+from ..frame_classification.assign_samples_to_splits import assign_samples_to_splits
+
 
 logger = logging.getLogger(__name__)
 
@@ -31,10 +35,12 @@ def prep_vae_dataset(
     dataset_type: str,
     output_dir: str | pathlib.Path | None = None,
     audio_format: str | None = None,
+    spect_format: str | None = None,
     spect_params: dict | None = None,
     annot_format: str | None = None,
     annot_file: str | pathlib.Path | None = None,
     labelset: set | None = None,
+    audio_dask_bag_kwargs: dict | None = None,
     context_s: float = 0.015,
     train_dur: int | None = None,
     val_dur: int | None = None,
@@ -44,7 +50,7 @@ def prep_vae_dataset(
     spect_key: str = "s",
     timebins_key: str = "t",
 ):
-    """Prepare datasets for parametric UMAP models.
+    """Prepare datasets for VAE models.
 
     For general information on dataset preparation,
     see the docstring for :func:`vak.prep.prep`.
@@ -66,6 +72,9 @@ def prep_vae_dataset(
         Format of audio files. One of {'wav', 'cbin'}.
         Default is ``None``, but either ``audio_format`` or ``spect_format``
         must be specified.
+    spect_format : str
+        Format of files containing spectrograms as 2-d matrices. One of {'mat', 'npz'}.
+        Default is None, but either audio_format or spect_format must be specified.
     spect_params : dict, vak.config.SpectParams
         Parameters for creating spectrograms. Default is ``None``.
     annot_format : str
@@ -79,6 +88,14 @@ def prep_vae_dataset(
         ``labelset`` is converted to a Python ``set`` using
         :func:`vak.converters.labelset_to_set`.
         See help for that function for details on how to specify ``labelset``.
+    audio_dask_bag_kwargs : dict
+        Keyword arguments used when calling :func:`dask.bag.from_sequence`
+        inside :func:`vak.io.audio`, where it is used to parallelize
+        the conversion of audio files into spectrograms.
+        Option should be specified in config.toml file as an inline table,
+        e.g., ``audio_dask_bag_kwargs = { npartitions = 20 }``.
+        Allows for finer-grained control
+        when needed to process files of different sizes.
     train_dur : float
         Total duration of training set, in seconds.
         When creating a learning curve,
@@ -224,120 +241,28 @@ def prep_vae_dataset(
     logger.info(f"Will prepare dataset as directory: {dataset_path}")
 
     # ---- actually make the dataset -----------------------------------------------------------------------------------
-    dataset_df, shape = prep_unit_dataset(
-        audio_format=audio_format,
-        output_dir=dataset_path,
-        spect_params=spect_params,
-        data_dir=data_dir,
-        annot_format=annot_format,
-        annot_file=annot_file,
-        labelset=labelset,
-        context_s=context_s,
-    )
-
-    if dataset_df.empty:
-        raise ValueError(
-            "Calling `vak.prep.unit_dataset.prep_unit_dataset` "
-            "with arguments passed to `vak.core.prep.prep_dimensionality_reduction_dataset` "
-            "returned an empty dataframe.\n"
-            "Please double-check arguments to `vak.core.prep` function."
-        )
-
-    # save before (possibly) splitting, just in case duration args are not valid
-    # (we can't know until we make dataset)
-    dataset_df.to_csv(dataset_csv_path)
-
-    # ---- (possibly) split into train / val / test sets ---------------------------------------------
-    # catch case where user specified duration for just training set, raise a helpful error instead of failing silently
-    if (purpose == "train" or purpose == "learncurve") and (
-        (train_dur is not None and train_dur > 0)
-        and (val_dur is None or val_dur == 0)
-        and (test_dur is None or val_dur == 0)
-    ):
-        raise ValueError(
-            "A duration specified for just training set, but prep function does not currently support creating a "
-            "single split of a specified duration. Either remove the train_dur option from the prep section and "
-            "rerun, in which case all data will be included in the training set, or specify values greater than "
-            "zero for test_dur (and val_dur, if a validation set will be used)"
-        )
-
-    if all(
-        [dur is None for dur in (train_dur, val_dur, test_dur)]
-    ) or purpose in (
-        "eval",
-        "predict",
-    ):
-        # then we're not going to split
-        logger.info("Will not split dataset.")
-        do_split = False
-    else:
-        if val_dur is not None and train_dur is None and test_dur is None:
-            raise ValueError(
-                "cannot specify only val_dur, unclear how to split dataset into training and test sets"
-            )
-        else:
-            logger.info("Will split dataset.")
-            do_split = True
-
-    if do_split:
-        dataset_df = split.unit_dataframe(
-            dataset_df,
+    if dataset_type == 'segment-vae':
+        prep_segment_vae_dataset(
+            data_dir,
             dataset_path,
-            labelset=labelset,
-            train_dur=train_dur,
-            val_dur=val_dur,
-            test_dur=test_dur,
-        )
-
-    elif (
-        do_split is False
-    ):  # add a split column, but assign everything to the same 'split'
-        # ideally we would just say split=purpose in call to add_split_col, but
-        # we have to special case, because "eval" looks for a 'test' split (not an "eval" split)
-        if purpose == "eval":
-            split_name = (
-                "test"  # 'split_name' to avoid name clash with split package
-            )
-        elif purpose == "predict":
-            split_name = "predict"
-
-        dataset_df = dataset_df_helper.add_split_col(
-            dataset_df, split=split_name
-        )
-
-    # ---- create and save labelmap ------------------------------------------------------------------------------------
-    # we do this before creating array files since we need to load the labelmap to make frame label vectors
-    if purpose != "predict":
-        # TODO: add option to generate predict using existing dataset, so we can get labelmap from it
-        labelmap = labels.to_map(labelset, map_unlabeled=False)
-        logger.info(
-            f"Number of classes in labelmap: {len(labelmap)}",
+            dataset_csv_path,
+            purpose,
+            audio_format,
+            spect_params,
+            annot_format,
+            annot_file,
+            labelset,
+            context_s,
+            train_dur,
+            val_dur,
+            test_dur,
+            train_set_durs,
+            num_replicates,
+            spect_key,
+            timebins_key,
         )
-        # save labelmap in case we need it later
-        with (dataset_path / "labelmap.json").open("w") as fp:
-            json.dump(labelmap, fp)
-    else:
-        labelmap = None
+    elif dataset_type == 'window-vae':
 
-    # ---- make arrays that represent final dataset --------------------------------------------------------------------
-    dataset_arrays.move_files_into_split_subdirs(
-        dataset_df,
-        dataset_path,
-        purpose,
-    )
-    #
-    # ---- if purpose is learncurve, additionally prep splits for that -----------------------------------------------
-    # if purpose == 'learncurve':
-    #     dataset_df = make_learncurve_splits_from_dataset_df(
-    #         dataset_df,
-    #         train_set_durs,
-    #         num_replicates,
-    #         dataset_path,
-    #         labelmap,
-    #         audio_format,
-    #         spect_key,
-    #         timebins_key,
-    #     )
 
     # ---- save csv file that captures provenance of source data -------------------------------------------------------
     logger.info(f"Saving dataset csv file: {dataset_csv_path}")
@@ -346,7 +271,7 @@ def prep_vae_dataset(
     )  # index is False to avoid having "Unnamed: 0" column when loading
 
     # ---- save metadata -----------------------------------------------------------------------------------------------
-    metadata = datasets.parametric_umap.Metadata(
+    metadata = datasets.vae.Metadata(
         dataset_csv_filename=str(dataset_csv_path.name),
         audio_format=audio_format,
         shape=shape,

From ce1782883551f583839fddbc74442d7a12a9e074 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 20:56:03 -0500
Subject: [PATCH 070/150] Fixing up src/vak/prep/vae/segment_vae.py

---
 src/vak/prep/vae/segment_vae.py | 32 +++++++++++++++++++++++++++++++-
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/src/vak/prep/vae/segment_vae.py b/src/vak/prep/vae/segment_vae.py
index b4c453996..350dcaad5 100644
--- a/src/vak/prep/vae/segment_vae.py
+++ b/src/vak/prep/vae/segment_vae.py
@@ -5,6 +5,8 @@
 import logging
 import pathlib
 
+import pandas as pd
+
 from ...common import labels
 from .. import dataset_df_helper, split
 from ..unit_dataset import prep_unit_dataset
@@ -32,7 +34,33 @@ def prep_segment_vae_dataset(
         num_replicates: int | None = None,
         spect_key: str = "s",
         timebins_key: str = "t",
-):
+) -> pd.DataFrame:
+    """
+
+    Parameters
+    ----------
+    data_dir
+    dataset_path
+    dataset_csv_path
+    purpose
+    audio_format
+    spect_params
+    annot_format
+    annot_file
+    labelset
+    context_s
+    train_dur
+    val_dur
+    test_dur
+    train_set_durs
+    num_replicates
+    spect_key
+    timebins_key
+
+    Returns
+    -------
+
+    """
     dataset_df, shape = prep_unit_dataset(
         audio_format=audio_format,
         output_dir=dataset_path,
@@ -146,3 +174,5 @@ def prep_segment_vae_dataset(
     #         spect_key,
     #         timebins_key,
     #     )
+
+    return dataset_df

From 388bd394c01327f980dea13f6597fdf3c0f61fff Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 20:56:10 -0500
Subject: [PATCH 071/150] Fixing up src/vak/prep/vae/window_vae.py

---
 src/vak/prep/vae/window_vae.py | 33 ++++++++++++++++++++++++++++++++-
 1 file changed, 32 insertions(+), 1 deletion(-)

diff --git a/src/vak/prep/vae/window_vae.py b/src/vak/prep/vae/window_vae.py
index 277f37160..6c4fe500b 100644
--- a/src/vak/prep/vae/window_vae.py
+++ b/src/vak/prep/vae/window_vae.py
@@ -20,10 +20,12 @@ def prep_window_vae_dataset(
     dataset_csv_path: str | pathlib.Path,
     purpose: str,
     audio_format: str | None = None,
+    spect_format: str | None = None,
     spect_params: dict | None = None,
     annot_format: str | None = None,
     annot_file: str | pathlib.Path | None = None,
     labelset: set | None = None,
+    audio_dask_bag_kwargs: dict | None = None,
     train_dur: int | None = None,
     val_dur: int | None = None,
     test_dur: int | None = None,
@@ -31,7 +33,34 @@ def prep_window_vae_dataset(
     num_replicates: int | None = None,
     spect_key: str = "s",
     timebins_key: str = "t",
-):
+) -> pd.DataFrame:
+    """
+
+    Parameters
+    ----------
+    data_dir
+    dataset_path
+    dataset_csv_path
+    purpose
+    audio_format
+    spect_format
+    spect_params
+    annot_format
+    annot_file
+    labelset
+    audio_dask_bag_kwargs
+    train_dur
+    val_dur
+    test_dur
+    train_set_durs
+    num_replicates
+    spect_key
+    timebins_key
+
+    Returns
+    -------
+
+    """
     source_files_df = prep_spectrogram_dataset(
         data_dir,
         annot_format,
@@ -109,3 +138,5 @@ def prep_window_vae_dataset(
     dataset_df.to_csv(
         dataset_csv_path, index=False
     )  # index is False to avoid having "Unnamed: 0" column when loading
+
+    return dataset_df
\ No newline at end of file

From dd3f4f9e7a3318e3cd8f18c6530e2c36e7d1bd8f Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 2 Jan 2024 20:56:18 -0500
Subject: [PATCH 072/150] Fixing up src/vak/prep/vae/vae.py

---
 src/vak/prep/vae/vae.py | 34 ++++++++++++++++++++++++----------
 1 file changed, 24 insertions(+), 10 deletions(-)

diff --git a/src/vak/prep/vae/vae.py b/src/vak/prep/vae/vae.py
index 602edec03..c10d3c53e 100644
--- a/src/vak/prep/vae/vae.py
+++ b/src/vak/prep/vae/vae.py
@@ -1,24 +1,19 @@
 """Prepare datasets for VAE models."""
 from __future__ import annotations
 
-import json
 import logging
 import pathlib
 import warnings
 
 import crowsetta
-import pandas as pd
 
 from ... import datasets
-from ...common import labels
 from ...common.converters import expanded_user_path, labelset_to_set
 from ...common.logging import config_logging_for_cli, log_version
 from ...common.timenow import get_timenow_as_str
-from .. import dataset_df_helper, split
-from ..unit_dataset import prep_unit_dataset
-from ..parametric_umap import dataset_arrays
-from ..spectrogram_dataset import prep_spectrogram_dataset
-from ..frame_classification.assign_samples_to_splits import assign_samples_to_splits
+from .. import dataset_df_helper
+from .segment_vae import prep_segment_vae_dataset
+from .window_vae import prep_window_vae_dataset
 
 
 logger = logging.getLogger(__name__)
@@ -242,7 +237,7 @@ def prep_vae_dataset(
 
     # ---- actually make the dataset -----------------------------------------------------------------------------------
     if dataset_type == 'segment-vae':
-        prep_segment_vae_dataset(
+        dataset_df = prep_segment_vae_dataset(
             data_dir,
             dataset_path,
             dataset_csv_path,
@@ -262,7 +257,26 @@ def prep_vae_dataset(
             timebins_key,
         )
     elif dataset_type == 'window-vae':
-
+        dataset_df = prep_window_vae_dataset(
+            data_dir,
+            dataset_path,
+            dataset_csv_path,
+            purpose,
+            audio_format,
+            spect_format,
+            spect_params,
+            annot_format,
+            annot_file,
+            labelset,
+            audio_dask_bag_kwargs,
+            train_dur,
+            val_dur,
+            test_dur,
+            train_set_durs,
+            num_replicates,
+            spect_key,
+            timebins_key,
+        )
 
     # ---- save csv file that captures provenance of source data -------------------------------------------------------
     logger.info(f"Saving dataset csv file: {dataset_csv_path}")

From 21fc4c13bcdf6d67cd583e1c00882bcf895adb68 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 3 Jan 2024 08:52:35 -0500
Subject: [PATCH 073/150] Add `is_valid_shape` to
 src/vak/datasets/validators.py

---
 src/vak/datasets/validators.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/src/vak/datasets/validators.py b/src/vak/datasets/validators.py
index f37be171d..237d63771 100644
--- a/src/vak/datasets/validators.py
+++ b/src/vak/datasets/validators.py
@@ -27,4 +27,15 @@ def is_valid_spect_format(instance, attribute, value):
         raise ValueError(
             f"Not a valid spectrogram format: {value}. "
             f"Valid spectrogram formats are: {vak.common.constants.VALID_SPECT_FORMATS}"
-        )
\ No newline at end of file
+        )
+
+
+def is_valid_shape(instance, attribute, value):
+    if not isinstance(value, tuple):
+        raise TypeError(
+            f"`shape` should be a tuple but type was: {type(value)}"
+        )
+    if not all([isinstance(val, int) and val > 0 for val in value]):
+        raise ValueError(
+            f"All values of `shape` should be positive integers but values were: {value}"
+        )

From 9bedc6cb3c2c085a310383e3c74740086554187d Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 3 Jan 2024 08:52:51 -0500
Subject: [PATCH 074/150] Make shape attribute optional in
 src/vak/datasets/vae/metadata.py

---
 src/vak/datasets/vae/metadata.py | 17 +++++------------
 1 file changed, 5 insertions(+), 12 deletions(-)

diff --git a/src/vak/datasets/vae/metadata.py b/src/vak/datasets/vae/metadata.py
index 9d4aaca1c..add2ce5b4 100644
--- a/src/vak/datasets/vae/metadata.py
+++ b/src/vak/datasets/vae/metadata.py
@@ -37,18 +37,11 @@ class Metadata:
         converter=str, validator=validators.is_valid_dataset_csv_filename
     )
 
-    shape: tuple = attr.field(converter=tuple)
-
-    @shape.validator
-    def is_valid_shape(self, attribute, value):
-        if not isinstance(value, tuple):
-            raise TypeError(
-                f"`shape` should be a tuple but type was: {type(value)}"
-            )
-        if not all([isinstance(val, int) and val > 0 for val in value]):
-            raise ValueError(
-                f"All values of `shape` should be positive integers but values were: {value}"
-            )
+    shape: tuple = attr.field(
+        converter=attr.converters.optional(tuple),
+        validator=attr.validators.optional(validators.is_valid_shape),
+        default=None
+    )
 
     audio_format: str = attr.field(
         converter=attr.converters.optional(str),

From d190a5fb901262a0cd8c24ae7801e7cd864ed046 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 3 Jan 2024 08:53:27 -0500
Subject: [PATCH 075/150] Fix how we handle shape metadata in
 src/vak/prep/vae/vae.py

---
 src/vak/prep/vae/vae.py | 13 ++++++++-----
 1 file changed, 8 insertions(+), 5 deletions(-)

diff --git a/src/vak/prep/vae/vae.py b/src/vak/prep/vae/vae.py
index c10d3c53e..f6e3c6efd 100644
--- a/src/vak/prep/vae/vae.py
+++ b/src/vak/prep/vae/vae.py
@@ -135,6 +135,7 @@ def prep_vae_dataset(
         raise ValueError(
             f"`dataset_type` must be one of '{VAE_DATASET_TYPES}', but was: {dataset_type}"
         )
+    logger.info(f"Type of VAE dataset that will be prepared : {dataset_type}")
 
     if labelset is not None:
         labelset = labelset_to_set(labelset)
@@ -166,7 +167,7 @@ def prep_vae_dataset(
         if labelset is not None:
             warnings.warn(
                 "The ``purpose`` argument was set to 'predict`, but a ``labelset`` was provided."
-                "This would cause an error because the ``prep_spectrogram_dataset`` section will attempt to "
+                "This would cause an error because the ``prep_spectrogram_dataset`` function will attempt to "
                 "check whether the files in the ``data_dir`` have labels in "
                 "``labelset``, even though those files don't have annotation.\n"
                 "Setting ``labelset`` to None."
@@ -179,17 +180,17 @@ def prep_vae_dataset(
                 "This will cause an error when trying to split the dataset, "
                 "e.g. into training and test splits, "
                 "or a silent error, e.g. when calculating metrics with an evaluation set. "
-                "Please specify a ``labelset`` when calling ``vak.prep.frame_classification.prep`` "
+                "Please specify a ``labelset`` when calling ``vak.prep.vae.prep_vae_dataset`` "
                 f"with ``purpose='{purpose}'."
             )
 
-    logger.info(f"Purpose for frame classification dataset: {purpose}")
+    logger.info(f"Purpose for VAE dataset: {purpose}")
     # ---- set up directory that will contain dataset, and csv file name -----------------------------------------------
     data_dir_name = data_dir.name
     timenow = get_timenow_as_str()
     dataset_path = (
         output_dir
-        / f"{data_dir_name}-vak-dimensionality-reduction-dataset-generated-{timenow}"
+        / f"{data_dir_name}-vak-vae-dataset-generated-{timenow}"
     )
     dataset_path.mkdir()
 
@@ -236,8 +237,9 @@ def prep_vae_dataset(
     logger.info(f"Will prepare dataset as directory: {dataset_path}")
 
     # ---- actually make the dataset -----------------------------------------------------------------------------------
+    logger.info(f"Preparing files for '{dataset_type}' dataset")
     if dataset_type == 'segment-vae':
-        dataset_df = prep_segment_vae_dataset(
+        dataset_df, shape = prep_segment_vae_dataset(
             data_dir,
             dataset_path,
             dataset_csv_path,
@@ -277,6 +279,7 @@ def prep_vae_dataset(
             spect_key,
             timebins_key,
         )
+        shape = None
 
     # ---- save csv file that captures provenance of source data -------------------------------------------------------
     logger.info(f"Saving dataset csv file: {dataset_csv_path}")

From 5785275e5db8a57ddb0e2a81cbecd1b19301c4bd Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 3 Jan 2024 09:04:54 -0500
Subject: [PATCH 076/150] Return shape from src/vak/prep/vae/segment_vae.py

---
 src/vak/prep/vae/segment_vae.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/vak/prep/vae/segment_vae.py b/src/vak/prep/vae/segment_vae.py
index 350dcaad5..c622cf611 100644
--- a/src/vak/prep/vae/segment_vae.py
+++ b/src/vak/prep/vae/segment_vae.py
@@ -34,7 +34,7 @@ def prep_segment_vae_dataset(
         num_replicates: int | None = None,
         spect_key: str = "s",
         timebins_key: str = "t",
-) -> pd.DataFrame:
+) -> tuple[pd.DataFrame, tuple[int]]:
     """
 
     Parameters
@@ -175,4 +175,4 @@ def prep_segment_vae_dataset(
     #         timebins_key,
     #     )
 
-    return dataset_df
+    return dataset_df, shape

From ea2ea199089a97c489b1ed45449ef5124b29a819 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 3 Jan 2024 09:05:25 -0500
Subject: [PATCH 077/150] Remove unused variable in
 src/vak/prep/vae/segment_vae.py

---
 src/vak/prep/vae/segment_vae.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/vak/prep/vae/segment_vae.py b/src/vak/prep/vae/segment_vae.py
index c622cf611..0bdf94429 100644
--- a/src/vak/prep/vae/segment_vae.py
+++ b/src/vak/prep/vae/segment_vae.py
@@ -152,8 +152,6 @@ def prep_segment_vae_dataset(
         # save labelmap in case we need it later
         with (dataset_path / "labelmap.json").open("w") as fp:
             json.dump(labelmap, fp)
-    else:
-        labelmap = None
 
     # ---- make arrays that represent final dataset --------------------------------------------------------------------
     dataset_arrays.move_files_into_split_subdirs(

From a033f9cd7cd0158af2577d3465a053c7188ca124 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 3 Jan 2024 09:34:05 -0500
Subject: [PATCH 078/150] Fix type annotations / docstring in
 src/vak/prep/unit_dataset/unit_dataset.py

---
 src/vak/prep/unit_dataset/unit_dataset.py | 45 +++++++++++++++++------
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/src/vak/prep/unit_dataset/unit_dataset.py b/src/vak/prep/unit_dataset/unit_dataset.py
index 76a0e29b0..0f959f45b 100644
--- a/src/vak/prep/unit_dataset/unit_dataset.py
+++ b/src/vak/prep/unit_dataset/unit_dataset.py
@@ -260,32 +260,55 @@ def prep_unit_dataset(
     audio_format: str,
     output_dir: str,
     spect_params: dict,
-    data_dir: list | None = None,
+    data_dir: str | pathlib.Path,
     annot_format: str | None = None,
     annot_file: str | pathlib.Path | None = None,
     labelset: set | None = None,
     context_s: float = 0.005,
-) -> pd.DataFrame:
+) -> tuple[pd.DataFrame, tuple[int]]:
     """Prepare a dataset of units from sequences,
     e.g., all syllables segmented out of a dataset of birdsong.
 
     Parameters
     ----------
-    audio_format
-    output_dir
-    spect_params
-    data_dir
-    annot_format
-    annot_file
-    labelset
-    context_s
+    audio_format : str
+        Format of audio files. One of {'wav', 'cbin'}.
+        Default is ``None``, but either ``audio_format`` or ``spect_format``
+        must be specified.
+    output_dir : str
+        Path to location where data sets should be saved.
+        Default is ``None``, in which case it defaults to ``data_dir``.
+    spect_params : dict, vak.config.SpectParams
+        Parameters for creating spectrograms. Default is ``None``.
+    data_dir : str, pathlib.Path
+        Path to directory with files from which to make dataset.
+    annot_format : str
+        Format of annotations. Any format that can be used with the
+        :mod:`crowsetta` library is valid. Default is ``None``.
+    annot_file : str
+        Path to a single annotation file. Default is ``None``.
+        Used when a single file contains annotates multiple audio
+        or spectrogram files.
+    labelset : str, list, set
+        Set of unique labels for vocalizations. Strings or integers.
+        Default is ``None``. If not ``None``, then files will be skipped
+        where the associated annotation
+        contains labels not found in ``labelset``.
+        ``labelset`` is converted to a Python ``set`` using
+        :func:`vak.converters.labelset_to_set`.
+        See help for that function for details on how to specify ``labelset``.
+    context_s : float
+        Number of seconds of "context" around unit to
+        add, i.e., time before and after the onset
+        and offset respectively. Default is 0.005s,
+        5 milliseconds.
 
     Returns
     -------
     unit_df : pandas.DataFrame
         A DataFrame representing all the units in the dataset.
     shape: tuple
-        A tuple representing the shape of all spectograms in the dataset.
+        A tuple representing the shape of all spectrograms in the dataset.
         The spectrograms of all units are padded so that they are all
         as wide as the widest unit (i.e, the one with the longest duration).
     """

From f1756c1eb9dcd919527af5acb40b0182f5a5940f Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 3 Jan 2024 09:36:03 -0500
Subject: [PATCH 079/150] Fix wording of error messages in
 src/vak/prep/parametric_umap/parametric_umap.py

---
 src/vak/prep/parametric_umap/parametric_umap.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/vak/prep/parametric_umap/parametric_umap.py b/src/vak/prep/parametric_umap/parametric_umap.py
index 9cb1d7cb6..eb8ceb29b 100644
--- a/src/vak/prep/parametric_umap/parametric_umap.py
+++ b/src/vak/prep/parametric_umap/parametric_umap.py
@@ -225,7 +225,7 @@ def prep_parametric_umap_dataset(
     if dataset_df.empty:
         raise ValueError(
             "Calling `vak.prep.unit_dataset.prep_unit_dataset` "
-            "with arguments passed to `vak.core.prep.prep_dimensionality_reduction_dataset` "
+            "with arguments passed to `vak.core.prep.prep_parametric_umap_dataset` "
             "returned an empty dataframe.\n"
             "Please double-check arguments to `vak.core.prep` function."
         )
@@ -242,7 +242,8 @@ def prep_parametric_umap_dataset(
         and (test_dur is None or val_dur == 0)
     ):
         raise ValueError(
-            "A duration specified for just training set, but prep function does not currently support creating a "
+            "A duration was specified for just the training set, "
+            "but prep function does not currently support creating a "
             "single split of a specified duration. Either remove the train_dur option from the prep section and "
             "rerun, in which case all data will be included in the training set, or specify values greater than "
             "zero for test_dur (and val_dur, if a validation set will be used)"

From ea032327321222675406e0ac9a36647d4ba8545e Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 3 Jan 2024 09:36:20 -0500
Subject: [PATCH 080/150] Fix wording of error messages in
 src/vak/prep/vae/segment_vae.py

---
 src/vak/prep/vae/segment_vae.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/src/vak/prep/vae/segment_vae.py b/src/vak/prep/vae/segment_vae.py
index 0bdf94429..5101718d0 100644
--- a/src/vak/prep/vae/segment_vae.py
+++ b/src/vak/prep/vae/segment_vae.py
@@ -1,4 +1,4 @@
-""""""
+"""Prepare a dataset of segments for a VAE model."""
 from __future__ import annotations
 
 import json
@@ -35,7 +35,7 @@ def prep_segment_vae_dataset(
         spect_key: str = "s",
         timebins_key: str = "t",
 ) -> tuple[pd.DataFrame, tuple[int]]:
-    """
+    """Prepare a dataset of segments for a VAE model.
 
     Parameters
     ----------
@@ -74,7 +74,7 @@ def prep_segment_vae_dataset(
     if dataset_df.empty:
         raise ValueError(
             "Calling `vak.prep.unit_dataset.prep_unit_dataset` "
-            "with arguments passed to `vak.core.prep.prep_dimensionality_reduction_dataset` "
+            "with arguments passed to `vak.core.prep.vae.prep_segment_vae_dataset` "
             "returned an empty dataframe.\n"
             "Please double-check arguments to `vak.core.prep` function."
         )
@@ -91,7 +91,8 @@ def prep_segment_vae_dataset(
             and (test_dur is None or val_dur == 0)
     ):
         raise ValueError(
-            "A duration specified for just training set, but prep function does not currently support creating a "
+            "A duration was specified for just the training set, "
+            "but prep function does not currently support creating a "
             "single split of a specified duration. Either remove the train_dur option from the prep section and "
             "rerun, in which case all data will be included in the training set, or specify values greater than "
             "zero for test_dur (and val_dur, if a validation set will be used)"

From c3c5eb7b04606b13e081e6ddd3fe46294b5bb6a0 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 3 Jan 2024 10:34:25 -0500
Subject: [PATCH 081/150] Add/use prep_frame_label_vecs parameter in
 src/vak/prep/frame_classification/make_splits.py

---
 src/vak/prep/frame_classification/make_splits.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/vak/prep/frame_classification/make_splits.py b/src/vak/prep/frame_classification/make_splits.py
index e4fd01564..cd5e1035c 100644
--- a/src/vak/prep/frame_classification/make_splits.py
+++ b/src/vak/prep/frame_classification/make_splits.py
@@ -127,6 +127,7 @@ def make_splits(
     spect_key: str = "s",
     timebins_key: str = "t",
     freqbins_key: str = "f",
+    prep_frame_label_vecs=True,
 ) -> pd.DataFrame:
     r"""Make each split of a frame classification dataset.
 
@@ -234,6 +235,11 @@ def make_splits(
         Key for accessing vector of time bins in files. Default is 't'.
     freqbins_key : str
         key for accessing vector of frequency bins in files. Default is 'f'.
+    prep_frame_label_vecs : bool
+        If True, prepare vectors of labels for each frame. Default is True.
+        This option is used by
+        :func:`vak.prep.vae.prep_window_vae_dataset`
+        since those datasets do not require frame labels.
 
     Returns
     -------
@@ -353,7 +359,7 @@ def _save_dataset_arrays_and_return_index_arrays(
             inds_in_sample_vec = np.arange(n_frames)
 
             # add to frame labels
-            if annot:
+            if prep_frame_label_vecs and annot:
                 lbls_int = [labelmap[lbl] for lbl in annot.seq.labels]
                 frame_labels = transforms.frame_labels.from_segments(
                     lbls_int,

From 6f746733de8a13003e9b0badd9e8a874e89a139a Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 3 Jan 2024 10:34:55 -0500
Subject: [PATCH 082/150] Fixing src/vak/prep/vae/window_vae.py

---
 src/vak/prep/vae/window_vae.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/src/vak/prep/vae/window_vae.py b/src/vak/prep/vae/window_vae.py
index 6c4fe500b..d8713da24 100644
--- a/src/vak/prep/vae/window_vae.py
+++ b/src/vak/prep/vae/window_vae.py
@@ -7,8 +7,11 @@
 import pandas as pd
 
 from ...common import labels
+from .. import sequence_dataset
 from ..spectrogram_dataset import prep_spectrogram_dataset
 from ..frame_classification.assign_samples_to_splits import assign_samples_to_splits
+from ..frame_classification.learncurve import make_subsets_from_dataset_df
+from ..frame_classification.make_splits import make_splits
 
 
 logger = logging.getLogger(__name__)
@@ -22,6 +25,7 @@ def prep_window_vae_dataset(
     audio_format: str | None = None,
     spect_format: str | None = None,
     spect_params: dict | None = None,
+    spect_output_dir: str | pathlib.Path | None = None,
     annot_format: str | None = None,
     annot_file: str | pathlib.Path | None = None,
     labelset: set | None = None,
@@ -33,6 +37,7 @@ def prep_window_vae_dataset(
     num_replicates: int | None = None,
     spect_key: str = "s",
     timebins_key: str = "t",
+    freqbins_key: str = "f",
 ) -> pd.DataFrame:
     """
 
@@ -45,6 +50,7 @@ def prep_window_vae_dataset(
     audio_format
     spect_format
     spect_params
+    spect_output_dir
     annot_format
     annot_file
     labelset
@@ -113,7 +119,8 @@ def prep_window_vae_dataset(
     dataset_df: pd.DataFrame = make_splits(
         dataset_df,
         dataset_path,
-        input_type,
+        # input_type="spect", we only make spectrogram datasets for now
+        "spect",
         purpose,
         labelmap,
         audio_format,
@@ -139,4 +146,4 @@ def prep_window_vae_dataset(
         dataset_csv_path, index=False
     )  # index is False to avoid having "Unnamed: 0" column when loading
 
-    return dataset_df
\ No newline at end of file
+    return dataset_df

From 41e08a360645c6a7653be17018be87040ebc0b36 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 3 Jan 2024 10:35:10 -0500
Subject: [PATCH 083/150] Fix how we detect vae dataset type in
 src/vak/prep/prep_.py

---
 src/vak/prep/prep_.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vak/prep/prep_.py b/src/vak/prep/prep_.py
index d6b45190b..dc48841ef 100644
--- a/src/vak/prep/prep_.py
+++ b/src/vak/prep/prep_.py
@@ -233,7 +233,7 @@ def prep(
             timebins_key=timebins_key,
         )
         return dataset_df, dataset_path
-    elif dataset_type == "vae":
+    elif dataset_type in {"vae-segment", "vae-window"}:
         dataset_df, dataset_path = prep_vae_dataset(
             data_dir,
             purpose,

From 4ef737f7bdd47e889b876ba6a705c2feef7eb2f7 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 3 Jan 2024 10:35:23 -0500
Subject: [PATCH 084/150] Fix DATASET_TYPES to match what we use elsewhere in
 src/vak/prep/vae/vae.py

---
 src/vak/prep/vae/vae.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/vak/prep/vae/vae.py b/src/vak/prep/vae/vae.py
index f6e3c6efd..375f6a1aa 100644
--- a/src/vak/prep/vae/vae.py
+++ b/src/vak/prep/vae/vae.py
@@ -20,7 +20,7 @@
 
 
 VAE_DATASET_TYPES = {
-    "segment-vae", "window-vae"
+    "vae-segment", "vae-window"
 }
 
 
@@ -279,6 +279,7 @@ def prep_vae_dataset(
             spect_key,
             timebins_key,
         )
+        # only segment-vae dataset has shape -- we set to None for metadata below
         shape = None
 
     # ---- save csv file that captures provenance of source data -------------------------------------------------------

From 232ad22a6145a3ae902c2cd022f80e79a07a92bf Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 3 Jan 2024 11:16:54 -0500
Subject: [PATCH 085/150] Add dataset_type attribute to vae.datasets.Metadata,
 add validator for it, add missing defs in Metadata docstring

---
 src/vak/datasets/vae/metadata.py | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/src/vak/datasets/vae/metadata.py b/src/vak/datasets/vae/metadata.py
index add2ce5b4..f6bdc2be9 100644
--- a/src/vak/datasets/vae/metadata.py
+++ b/src/vak/datasets/vae/metadata.py
@@ -12,6 +12,14 @@
 import attr
 
 from .. import validators
+from ...prep.vae.vae import VAE_DATASET_TYPES
+
+
+def is_valid_vae_dataset_type(instance, attribute, value):
+    if value not in VAE_DATASET_TYPES:
+        raise ValueError(
+            f"`dataset_type` must be one of '{VAE_DATASET_TYPES}', but was: {value}"
+        )
 
 
 @attr.define
@@ -26,7 +34,15 @@ class Metadata:
         Name of csv file representing the source files in the dataset.
         Csv file will be located in root of directory representing dataset,
         so only the filename is given.
-    audio_format
+    dataset_type : str
+        One of: {'vae-segment', 'vae-window'}
+    audio_format : str
+        Format of audio files. One of {'wav', 'cbin'}.
+        Default is ``None``, but either ``audio_format`` or ``spect_format``
+        must be specified.
+    shape : tuple, optional
+        Shape of dataset.
+        Only used for 'segment-vae' dataset.
     """
 
     # declare this as a constant to avoid
@@ -36,6 +52,9 @@ class Metadata:
     dataset_csv_filename: str = attr.field(
         converter=str, validator=validators.is_valid_dataset_csv_filename
     )
+    dataset_type: str = attr.field(
+        converter=str, validator=is_valid_vae_dataset_type
+    )
 
     shape: tuple = attr.field(
         converter=attr.converters.optional(tuple),

From 6da7c33a1049947f9a0dc6750dafab8b3825cd31 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 3 Jan 2024 11:28:41 -0500
Subject: [PATCH 086/150] Add missing args to prep_vae_dataset in
 src/vak/prep/prep_.py

---
 src/vak/prep/prep_.py | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/src/vak/prep/prep_.py b/src/vak/prep/prep_.py
index dc48841ef..65e8dacff 100644
--- a/src/vak/prep/prep_.py
+++ b/src/vak/prep/prep_.py
@@ -240,10 +240,12 @@ def prep(
             dataset_type,
             output_dir,
             audio_format,
+            spect_format,
             spect_params,
             annot_format,
             annot_file,
             labelset,
+            audio_dask_bag_kwargs,
             context_s,
             train_dur,
             val_dur,

From 6bd41ffd6b2799923605067b246323d66292847a Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 3 Jan 2024 11:32:26 -0500
Subject: [PATCH 087/150] Fix how we detect dataset type in
 src/vak/prep/vae/vae.py, pass dataset_type as arg into Metadata at end

---
 src/vak/prep/vae/vae.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/vak/prep/vae/vae.py b/src/vak/prep/vae/vae.py
index 375f6a1aa..c432e5c18 100644
--- a/src/vak/prep/vae/vae.py
+++ b/src/vak/prep/vae/vae.py
@@ -238,7 +238,7 @@ def prep_vae_dataset(
 
     # ---- actually make the dataset -----------------------------------------------------------------------------------
     logger.info(f"Preparing files for '{dataset_type}' dataset")
-    if dataset_type == 'segment-vae':
+    if dataset_type == 'vae-segment':
         dataset_df, shape = prep_segment_vae_dataset(
             data_dir,
             dataset_path,
@@ -258,7 +258,7 @@ def prep_vae_dataset(
             spect_key,
             timebins_key,
         )
-    elif dataset_type == 'window-vae':
+    elif dataset_type == 'vae-window':
         dataset_df = prep_window_vae_dataset(
             data_dir,
             dataset_path,
@@ -291,6 +291,7 @@ def prep_vae_dataset(
     # ---- save metadata -----------------------------------------------------------------------------------------------
     metadata = datasets.vae.Metadata(
         dataset_csv_filename=str(dataset_csv_path.name),
+        dataset_type=dataset_type,
         audio_format=audio_format,
         shape=shape,
     )

From 520bb4ea57c1d6664d4a32b997b5c8943f606234 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 3 Jan 2024 11:58:37 -0500
Subject: [PATCH 088/150] Remove spect_output_dir parameter from
 prep_window_ave_dataset, instead just specify audio-dask_bag_kwargs as a
 keyword argument

---
 src/vak/prep/vae/window_vae.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/src/vak/prep/vae/window_vae.py b/src/vak/prep/vae/window_vae.py
index d8713da24..90713cd2a 100644
--- a/src/vak/prep/vae/window_vae.py
+++ b/src/vak/prep/vae/window_vae.py
@@ -25,7 +25,6 @@ def prep_window_vae_dataset(
     audio_format: str | None = None,
     spect_format: str | None = None,
     spect_params: dict | None = None,
-    spect_output_dir: str | pathlib.Path | None = None,
     annot_format: str | None = None,
     annot_file: str | pathlib.Path | None = None,
     labelset: set | None = None,
@@ -75,8 +74,7 @@ def prep_window_vae_dataset(
         audio_format,
         spect_format,
         spect_params,
-        spect_output_dir,
-        audio_dask_bag_kwargs,
+        audio_dask_bag_kwargs=audio_dask_bag_kwargs,
     )
 
     # save before (possibly) splitting, just in case duration args are not valid

From a9decc33ee34015b44ba606f180cb4644ae0c913 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Thu, 4 Jan 2024 18:00:42 -0500
Subject: [PATCH 089/150] Make fixes in src/vak/train/vae.py

---
 src/vak/train/vae.py | 125 ++++++++++++++++++++++++++-----------------
 1 file changed, 76 insertions(+), 49 deletions(-)

diff --git a/src/vak/train/vae.py b/src/vak/train/vae.py
index 375a138c4..a45f07fc4 100644
--- a/src/vak/train/vae.py
+++ b/src/vak/train/vae.py
@@ -5,7 +5,6 @@
 import logging
 import pathlib
 
-import numpy as np
 import pandas as pd
 import pytorch_lightning as lightning
 import torch.utils.data
@@ -13,8 +12,7 @@
 from .. import datasets, models, transforms
 from ..common import validators
 from ..common.device import get_default as get_default_device
-from ..common.trainer import get_default_trainer
-from ..datasets.vae import SegmentDataset
+from ..datasets.vae import SegmentDataset, WindowDataset
 from .frame_classification import get_split_dur
 
 
@@ -88,11 +86,9 @@ def train_vae_model(
     checkpoint_path: str | pathlib.Path | None = None,
     spect_scaler_path: str | pathlib.Path | None = None,
     results_path: str | pathlib.Path | None = None,
-    normalize_spectrograms: bool = True,
     shuffle: bool = True,
     val_step: int | None = None,
     ckpt_step: int | None = None,
-    patience: int | None = None,
     device: str | None = None,
     subset: str | None = None,
 ) -> None:
@@ -128,7 +124,6 @@ def train_vae_model(
     shuffle
     val_step
     ckpt_step
-    patience
     device
     subset
 
@@ -185,72 +180,104 @@ def train_vae_model(
         f"Total duration of training split from dataset (in s): {train_dur}",
     )
 
-    train_transform_params = {}
-    transform = transforms.defaults.get_default_transform(
-        "ConvEncoderUMAP", "train", train_transform_params
-    )
-
     if train_transform_params is None:
         train_transform_params = {}
-    train_dataset = SegmentDataset.from_dataset_path(
-        dataset_path=dataset_path,
-        split="train",
-        transform=transform,
+    transform = transforms.defaults.get_default_transform(
+        model_name, "train", train_transform_params
     )
 
+    if metadata.dataset_type == 'vae-segment':
+        train_dataset = SegmentDataset.from_dataset_path(
+            dataset_path=dataset_path,
+            split="train",
+            subset=subset,
+            transform=transform,
+            **train_dataset_params,
+        )
+    elif metadata.dataset_type == 'vae-window':
+        train_dataset = WindowDataset.from_dataset_path(
+            dataset_path=dataset_path,
+            split="train",
+            subset=subset,
+            transform=transform,
+            **train_dataset_params,
+        )
+
     train_loader = torch.utils.data.DataLoader(
         dataset=train_dataset,
-        shuffle=True,
-        batch_size=64,
-        num_workers=16,
+        shuffle=shuffle,
+        batch_size=batch_size,
+        num_workers=num_workers,
     )
 
     # ---------------- load validation set (if there is one) -----------------------------------------------------------
+    if val_step:
+        if val_transform_params is None:
+            val_transform_params = {}
+        transform = transforms.defaults.get_default_transform(
+            model_name, "eval", val_transform_params
+        )
+        if val_dataset_params is None:
+            val_dataset_params = {}
+        if metadata.dataset_type == 'vae-segment':
+            val_dataset = SegmentDataset.from_dataset_path(
+                dataset_path=dataset_path,
+                split="val",
+                transform=transform,
+                **val_dataset_params,
+            )
+        elif metadata.dataset_type == 'vae-window':
+            val_dataset = WindowDataset.from_dataset_path(
+                dataset_path=dataset_path,
+                split="val",
+                transform=transform,
+                **val_dataset_params,
+            )
+        print(
+            f"Duration of ParametricUMAPDataset used for validation, in seconds: {val_dataset.duration}",
+        )
+        val_loader = torch.utils.data.DataLoader(
+            dataset=val_dataset,
+            shuffle=False,
+            batch_size=64,
+            num_workers=16,
+        )
 
-    val_transform_params = {}
-    transform = vak.transforms.defaults.get_default_transform(
-        "ConvEncoderUMAP", "eval", val_transform_params
-    )
-    val_dataset_params = {}
-    val_dataset = SpectrogramPipe.from_dataset_path(
-        dataset_path=dataset_path,
-        split="val",
-        transform=transform,
-        **val_dataset_params,
-    )
-    print(
-        f"Duration of ParametricUMAPDataset used for validation, in seconds: {val_dataset.duration}",
-    )
-    val_loader = torch.utils.data.DataLoader(
-        dataset=val_dataset,
-        shuffle=False,
-        batch_size=64,
-        num_workers=16,
-    )
-
-    device = vak.common.device.get_default()
+    if device is None:
+        device = get_default_device()
 
-    model = vak.models.get(
-        "AVA",
-        config={"network": {}, "optimizer": {"lr": 0.001}},
+    model = models.get(
+        model_name,
+        config=model_config,
         input_shape=train_dataset.shape,
     )
 
-    results_model_root = results_path.joinpath("AVA")
-    results_model_root.mkdir(exist_ok=True)
+    if checkpoint_path is not None:
+        logger.info(
+            f"loading checkpoint for {model_name} from path: {checkpoint_path}",
+        )
+        model.load_state_dict_from_path(checkpoint_path)
+
+    results_model_root = results_path.joinpath(model_name)
+    results_model_root.mkdir()
     ckpt_root = results_model_root.joinpath("checkpoints")
     ckpt_root.mkdir(exist_ok=True)
-
+    logger.info(f"training {model_name}")
     trainer = get_trainer(
-        max_epochs=50,
+        max_epochs=num_epochs,
         log_save_dir=results_model_root,
         device=device,
         ckpt_root=ckpt_root,
-        ckpt_step=250,
+        ckpt_step=ckpt_step,
     )
-
+    train_time_start = datetime.datetime.now()
+    logger.info(f"Training start time: {train_time_start.isoformat()}")
     trainer.fit(
         model=model,
         train_dataloaders=train_loader,
         val_dataloaders=val_loader,
     )
+    train_time_stop = datetime.datetime.now()
+    logger.info(f"Training stop time: {train_time_stop.isoformat()}")
+    elapsed = train_time_stop - train_time_start
+    logger.info(f"Elapsed training time: {elapsed}")

From 9ca302342e0eba6c21122aa0fff2fb88b8c370d4 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Thu, 4 Jan 2024 18:00:59 -0500
Subject: [PATCH 090/150] Have src/vak/train/train_.py call train_vae_model as
 appropriate

---
 src/vak/train/train_.py | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/src/vak/train/train_.py b/src/vak/train/train_.py
index 79ee2897f..ecae272aa 100644
--- a/src/vak/train/train_.py
+++ b/src/vak/train/train_.py
@@ -8,6 +8,8 @@
 from ..common import validators
 from .frame_classification import train_frame_classification_model
 from .parametric_umap import train_parametric_umap_model
+from .vae import train_vae_model
+
 
 logger = logging.getLogger(__name__)
 
@@ -207,5 +209,25 @@ def train(
             device=device,
             subset=subset,
         )
+    elif model_family == "VAEModel":
+        train_vae_model(
+            model_name=model_name,
+            model_config=model_config,
+            dataset_path=dataset_path,
+            batch_size=batch_size,
+            num_epochs=num_epochs,
+            num_workers=num_workers,
+            train_transform_params=train_transform_params,
+            train_dataset_params=train_dataset_params,
+            val_transform_params=val_transform_params,
+            val_dataset_params=val_dataset_params,
+            checkpoint_path=checkpoint_path,
+            results_path=results_path,
+            shuffle=shuffle,
+            val_step=val_step,
+            ckpt_step=ckpt_step,
+            device=device,
+            subset=subset,
+        )
     else:
         raise ValueError(f"Model family not recognized: {model_family}")

From e4e34fa8f5d1aab4ba20325d8cb17186cdcf979e Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Thu, 4 Jan 2024 18:18:18 -0500
Subject: [PATCH 091/150] Add 'subset' parameter to SegmentDataset, revise
 docstring

---
 src/vak/datasets/vae/segment_dataset.py | 49 +++++++++++++++++++++----
 1 file changed, 42 insertions(+), 7 deletions(-)

diff --git a/src/vak/datasets/vae/segment_dataset.py b/src/vak/datasets/vae/segment_dataset.py
index 7e8d36ad1..d334bd1a0 100644
--- a/src/vak/datasets/vae/segment_dataset.py
+++ b/src/vak/datasets/vae/segment_dataset.py
@@ -1,3 +1,8 @@
+"""Dataset class for VAE models that operate on segments.
+
+Segments are typically found with a segmenting algorithm
+that thresholds audio signal energy,
+e.g., syllables from birdsong or mouse USVs."""
 from __future__ import annotations
 
 import pathlib
@@ -10,11 +15,11 @@
 
 
 class SegmentDataset(torch.utils.data.Dataset):
-    """Pipeline for loading samples from a dataset of spectrograms
+    """Dataset class for VAE models that operate on segments.
 
-    This is a simplified version of
-    :class:`vak.datasets.parametric_umap.ParametricUmapInferenceDataset`.
-    """
+    Segments are typically found with a segmenting algorithm
+    that thresholds audio signal energy,
+    e.g., syllables from birdsong or mouse USVs."""
 
     def __init__(
             self,
@@ -51,8 +56,34 @@ def from_dataset_path(
             cls,
             dataset_path: str | pathlib.Path,
             split: str,
+            subset: str | None = None,
             transform: Callable | None = None,
     ):
+        """Make a :class:`SegmentDataset` instance,
+        given the path to a VAE segment dataset.
+
+        Parameters
+        ----------
+        dataset_path : pathlib.Path
+            Path to directory that represents a
+            frame classification dataset,
+            as created by
+            :func:`vak.prep.prep_frame_classification_dataset`.
+        split : str
+            The name of a split from the dataset,
+            one of {'train', 'val', 'test'}.
+        subset : str, optional
+            Name of subset to use.
+            If specified, this takes precedence over split.
+            Subsets are typically taken from the training data
+            for use when generating a learning curve.
+        transform : callable
+            The transform applied to the input to the neural network :math:`x`.
+
+        Returns
+        -------
+        dataset : vak.datasets.vae.SegmentDataset
+        """
         import vak.datasets  # import here just to make classmethod more explicit
 
         dataset_path = pathlib.Path(dataset_path)
@@ -62,16 +93,20 @@ def from_dataset_path(
 
         dataset_csv_path = dataset_path / metadata.dataset_csv_filename
         dataset_df = pd.read_csv(dataset_csv_path)
-        split_df = dataset_df[dataset_df.split == split]
+        # subset takes precedence over split, if specified
+        if subset:
+            dataset_df = dataset_df[dataset_df.subset == subset].copy()
+        else:
+            dataset_df = dataset_df[dataset_df.split == split].copy()
 
         data = np.stack(
             [
                 np.load(dataset_path / spect_path)
-                for spect_path in split_df.spect_path.values
+                for spect_path in dataset_df.spect_path.values
             ]
         )
         return cls(
             data,
-            split_df,
+            dataset_df,
             transform=transform,
         )

From 761acf88f5cb3c8591325f1b134bd9108b682130 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Thu, 4 Jan 2024 18:18:30 -0500
Subject: [PATCH 092/150] Revise docstrings in
 src/vak/datasets/vae/window_dataset.py

---
 src/vak/datasets/vae/window_dataset.py | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/src/vak/datasets/vae/window_dataset.py b/src/vak/datasets/vae/window_dataset.py
index ad2add327..34e55293d 100644
--- a/src/vak/datasets/vae/window_dataset.py
+++ b/src/vak/datasets/vae/window_dataset.py
@@ -1,4 +1,5 @@
-"""Dataset class used for training VAE models on fixed-sized windows, such as a "shotgun VAE" [1]_.
+"""Dataset class used for VAE models that operate on fixed-sized windows,
+such as a "shotgun VAE" [1]_.
 
 .. [1] Goffinet, J., Brudner, S., Mooney, R., & Pearson, J. (2021).
    Low-dimensional learned feature spaces quantify individual and group differences in vocal repertoires.
@@ -17,7 +18,8 @@
 
 
 class WindowDataset:
-    """Dataset class used for training VAE models on fixed-sized windows, such as a "shotgun VAE" [1]_.
+    """Dataset class used for VAE models that operate on fixed-sized windows,
+    such as a "shotgun VAE" [1]_.
 
     Attributes
     ----------

From 4c5fde932c62c6a25e032f041dd173386c866f04 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Thu, 4 Jan 2024 18:18:41 -0500
Subject: [PATCH 093/150] Fix up src/vak/train/vae.py

---
 src/vak/train/vae.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/vak/train/vae.py b/src/vak/train/vae.py
index a45f07fc4..1be7efb63 100644
--- a/src/vak/train/vae.py
+++ b/src/vak/train/vae.py
@@ -239,8 +239,8 @@ def train_vae_model(
         val_loader = torch.utils.data.DataLoader(
             dataset=val_dataset,
             shuffle=False,
-            batch_size=64,
-            num_workers=16,
+            batch_size=batch_size,
+            num_workers=num_workers,
         )
 
     if device is None:

From f0bcc438a04d7ccae38f4b821e063c6f1070c25f Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Sun, 7 Jan 2024 15:34:39 -0500
Subject: [PATCH 094/150] Fix Metadata class used in
 src/vak/datasets/vae/segment_dataset.py

---
 src/vak/datasets/vae/segment_dataset.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vak/datasets/vae/segment_dataset.py b/src/vak/datasets/vae/segment_dataset.py
index d334bd1a0..687988b94 100644
--- a/src/vak/datasets/vae/segment_dataset.py
+++ b/src/vak/datasets/vae/segment_dataset.py
@@ -87,7 +87,7 @@ def from_dataset_path(
         import vak.datasets  # import here just to make classmethod more explicit
 
         dataset_path = pathlib.Path(dataset_path)
-        metadata = vak.datasets.parametric_umap.Metadata.from_dataset_path(
+        metadata = vak.datasets.vae.Metadata.from_dataset_path(
             dataset_path
         )
 

From 521462503ee3302d81a828413b08eb8e6ff09f1d Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Sun, 7 Jan 2024 15:34:46 -0500
Subject: [PATCH 095/150] Fix up src/vak/train/vae.py

---
 src/vak/train/vae.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/src/vak/train/vae.py b/src/vak/train/vae.py
index 1be7efb63..bfcf7b170 100644
--- a/src/vak/train/vae.py
+++ b/src/vak/train/vae.py
@@ -150,7 +150,7 @@ def train_vae_model(
     logger.info(
         f"Loading dataset from path: {dataset_path}",
     )
-    metadata = datasets.parametric_umap.Metadata.from_dataset_path(
+    metadata = datasets.vae.Metadata.from_dataset_path(
         dataset_path
     )
     dataset_csv_path = dataset_path / metadata.dataset_csv_filename
@@ -186,6 +186,8 @@ def train_vae_model(
         model_name, "train", train_transform_params
     )
 
+    if train_dataset_params is None:
+        train_dataset_params = {}
     if metadata.dataset_type == 'vae-segment':
         train_dataset = SegmentDataset.from_dataset_path(
             dataset_path=dataset_path,
@@ -234,7 +236,7 @@ def train_vae_model(
                 **val_dataset_params,
             )
         print(
-            f"Duration of ParametricUMAPDataset used for validation, in seconds: {val_dataset.duration}",
+            f"Duration of dataset used for validation, in seconds: {val_dataset.duration}",
         )
         val_loader = torch.utils.data.DataLoader(
             dataset=val_dataset,
@@ -262,7 +264,7 @@ def train_vae_model(
     results_model_root.mkdir()
     ckpt_root = results_model_root.joinpath("checkpoints")
     ckpt_root.mkdir(exist_ok=True)
-    logger.info(f"training {model_name}")
+    logger.info(f"Training model: {model_name}")
     trainer = get_trainer(
         max_epochs=num_epochs,
         log_save_dir=results_model_root,

From e5dd7121b1506113bef842b06d537a44f8b8d3ac Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Sun, 7 Jan 2024 20:44:10 -0500
Subject: [PATCH 096/150] Add type annotations in
 src/vak/prep/spectrogram_dataset/spect.py

---
 src/vak/prep/spectrogram_dataset/spect.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/src/vak/prep/spectrogram_dataset/spect.py b/src/vak/prep/spectrogram_dataset/spect.py
index d4d84ada0..602da575a 100644
--- a/src/vak/prep/spectrogram_dataset/spect.py
+++ b/src/vak/prep/spectrogram_dataset/spect.py
@@ -5,7 +5,10 @@
 spectrogram adapted from code by Kyle Kastner and Tim Sainburg
 https://github.com/timsainb/python_spectrograms_and_inversion
 """
+from __future__ import annotations
+
 import numpy as np
+import numpy.typing as npt
 from matplotlib.mlab import specgram
 from scipy.signal import butter, lfilter
 
@@ -25,14 +28,14 @@ def butter_bandpass_filter(data, lowcut, highcut, fs, order=5):
 
 
 def spectrogram(
-    dat,
-    samp_freq,
-    fft_size=512,
-    step_size=64,
-    thresh=None,
-    transform_type=None,
-    freq_cutoffs=None,
-):
+    dat: npt.NDArray,
+    samp_freq: int,
+    fft_size: int = 512,
+    step_size: int = 64,
+    thresh: float | None = None,
+    transform_type: str | None = None,
+    freq_cutoffs: list[int, int] | None = None,
+) -> tuple[npt.NDArray, npt.NDArray, npt.NDArray]:
     """creates a spectrogram
 
     Parameters

From 5b9637d9d1fecca5790f324640aa9b3a3f1779d7 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Sun, 7 Jan 2024 20:44:28 -0500
Subject: [PATCH 097/150] WIP: Add max_dur and target_shape parameters in
 src/vak/prep/unit_dataset/unit_dataset.py

---
 src/vak/prep/unit_dataset/unit_dataset.py | 105 +++++++++++++++++++---
 1 file changed, 93 insertions(+), 12 deletions(-)

diff --git a/src/vak/prep/unit_dataset/unit_dataset.py b/src/vak/prep/unit_dataset/unit_dataset.py
index 0f959f45b..f7d55dfc3 100644
--- a/src/vak/prep/unit_dataset/unit_dataset.py
+++ b/src/vak/prep/unit_dataset/unit_dataset.py
@@ -12,11 +12,13 @@
 import dask.delayed
 import numpy as np
 import numpy.typing as npt
+import scipy.interpolate
 import pandas as pd
 from dask.diagnostics import ProgressBar
 
 from ...common import annotation, constants
 from ...common.converters import expanded_user_path, labelset_to_set
+from ...config.spect_params import SpectParamsConfig
 from ..spectrogram_dataset.audio_helper import files_from_dir
 from ..spectrogram_dataset.spect import spectrogram
 
@@ -53,14 +55,12 @@ def get_segment_list(
     annot: crowsetta.Annotation,
     audio_format: str,
     context_s: float = 0.005,
+    max_dur: float | None = None
 ) -> list[Segment]:
     """Get a list of :class:`Segment` instances, given
     the path to an audio file and an annotation that indicates
     where segments occur in that audio file.
 
-    Function used by
-    :func:`vak.prep.dimensionality_reduction.unit_dataset.prep_unit_dataset`.
-
     Parameters
     ----------
     audio_path : str
@@ -74,11 +74,23 @@ def get_segment_list(
         add, i.e., time before and after the onset
         and offset respectively. Default is 0.005s,
         5 milliseconds.
+    max_dur : float
+        Maximum duration for segments.
+        If a float value is specified,
+        any segment with a duration larger than
+        that value (in seconds) will be omitted
+        from the returned list of segments.
+        Default is None.
 
     Returns
     -------
     segments : list
         A :class:`list` of :class:`Segment` instances.
+
+    Notes
+    -----
+    Function used by
+    :func:`vak.prep.unit_dataset.prep_unit_dataset`.
     """
     data, samplerate = constants.AUDIO_FORMAT_FUNC_MAP[audio_format](
         audio_path
@@ -86,9 +98,20 @@ def get_segment_list(
     sample_dur = 1.0 / samplerate
 
     segments = []
-    for onset_s, offset_s, label in zip(
+    for segment_num, (onset_s, offset_s, label) in enumerate(zip(
         annot.seq.onsets_s, annot.seq.offsets_s, annot.seq.labels
-    ):
+    )):
+        if max_dur is not None:
+            segment_dur = offset_s - onset_s
+            if segment_dur > max_dur:
+                logger.info(
+                    f"Segment {segment_num} in {pathlib.Path(audio_path).name}, "
+                    f"with onset at {onset_s}s and offset at {offset_s}s with label '{label}',"
+                    f"has duration ({segment_dur}) that is greater than "
+                    f"maximum allowed duration ({max_dur})."
+                    "Omitting segment from dataset."
+                )
+                continue
         onset_s -= context_s
         offset_s += context_s
         onset_ind = int(np.floor(onset_s * samplerate))
@@ -112,21 +135,43 @@ def get_segment_list(
 
 
 def spectrogram_from_segment(
-    segment: Segment, spect_params: dict
+    segment: Segment,
+    spect_params: SpectParamsConfig,
+    max_dur: float | None = None,
+    target_shape: tuple[int, int] | None = None
 ) -> npt.NDArray:
     """Compute a spectrogram given a :class:`Segment` instance.
 
     Parameters
     ----------
     segment : Segment
-    spect_params : dict
+    spect_params : SpectParamsConfig
+    max_dur : float
+        Maximum duration for segments.
+        Used with ``target_shape`` when reshaping
+        the spectrogram via interpolation.
+        Default is None.
+    target_shape : tuple
+        Of ints, (target number of frequency bins,
+        target number of time bins).
+        Spectrograms of units will be reshaped
+        by interpolation to have the specified
+        number of frequency and time bins.
+        The transformation is only applied if both this
+        parameter and ``max_dur`` are specified.
+        Default is None.
 
     Returns
     -------
     spect : numpy.ndarray
+
+    Notes
+    -----
+    Function used by
+    :func:`vak.prep.unit_dataset.prep_unit_dataset`.
     """
     data, samplerate = np.array(segment.data), segment.samplerate
-    s, _, _ = spectrogram(
+    s, f, t = spectrogram(
         data,
         samplerate,
         spect_params.fft_size,
@@ -135,6 +180,15 @@ def spectrogram_from_segment(
         spect_params.transform_type,
         spect_params.freq_cutoffs,
     )
+    if max_dur and target_shape:
+        # if max_dur and target_shape are specified we interpolate spectrogram to target shape, like AVA
+        interp = scipy.interpolate.interp2d(t, f, s, copy=False, bounds_error=False, fill_value=-1 / 1e12)
+        target_freqs = np.linspace(f.min(), f.max(), target_shape[0])
+        duration = t.max() - t.min()
+        new_duration = np.sqrt(duration * max_dur)  # stretched duration
+        shoulder = 0.5 * (max_dur - new_duration)
+        target_times = np.linspace(t.min() - shoulder, t.max() + shoulder, target_shape[1])
+        s = interp(target_times, target_freqs, assume_sorted=True)
     return s
 
 
@@ -190,14 +244,24 @@ def abspath(a_path):
 # ---- make spectrograms + records for dataframe -----------------------------------------------------------------------
 @dask.delayed
 def make_spect_return_record(
-    segment: Segment, ind: int, spect_params: dict, output_dir: pathlib.Path
+    segment: Segment,
+    ind: int,
+    spect_params: SpectParamsConfig,
+    output_dir: pathlib.Path,
+    max_dur: float | None = None,
+    target_shape: tuple[int, int] | None = None,
 ) -> tuple:
     """Helper function that enables parallelized creation of "records",
     i.e. rows for dataframe, from .
     Accepts a two-element tuple containing (1) a dictionary that represents a spectrogram
     and (2) annotation for that file"""
 
-    spect = spectrogram_from_segment(segment, spect_params)
+    spect = spectrogram_from_segment(
+        segment,
+        spect_params,
+        max_dur,
+        target_shape,
+    )
     n_timebins = spect.shape[-1]
 
     spect_to_save = SpectToSave(spect, ind, segment.audio_path)
@@ -265,6 +329,8 @@ def prep_unit_dataset(
     annot_file: str | pathlib.Path | None = None,
     labelset: set | None = None,
     context_s: float = 0.005,
+    max_dur: float | None = None,
+    target_shape: tuple[int, int] | None = None,
 ) -> tuple[pd.DataFrame, tuple[int]]:
     """Prepare a dataset of units from sequences,
     e.g., all syllables segmented out of a dataset of birdsong.
@@ -302,6 +368,21 @@ def prep_unit_dataset(
         add, i.e., time before and after the onset
         and offset respectively. Default is 0.005s,
         5 milliseconds.
+    max_dur : float
+        Maximum duration for segments.
+        If a float value is specified,
+        any segment with a duration larger than
+        that value (in seconds) will be omitted
+        from the dataset. Default is None.
+    target_shape : tuple
+        Of ints, (target number of frequency bins,
+        target number of time bins).
+        Spectrograms of units will be reshaped
+        by interpolation to have the specified
+        number of frequency and time bins.
+        The transformation is only applied if both this
+        parameter and ``max_dur`` are specified.
+        Default is None.
 
     Returns
     -------
@@ -379,7 +460,7 @@ def prep_unit_dataset(
     segments = []
     for audio_path, annot in audio_annot_map.items():
         segment_list = dask.delayed(get_segment_list)(
-            audio_path, annot, audio_format, context_s
+            audio_path, annot, audio_format, context_s, max_dur
         )
         segments.append(segment_list)
 
@@ -400,7 +481,7 @@ def prep_unit_dataset(
     records_n_timebins_tuples = []
     for ind, segment in enumerate(segments):
         records_n_timebins_tuple = make_spect_return_record(
-            segment, ind, spect_params, output_dir
+            segment, ind, spect_params, output_dir, max_dur, target_shape,
         )
         records_n_timebins_tuples.append(records_n_timebins_tuple)
     with ProgressBar():

From bde3b2f2c2238bd26ba51f1038f46b073d754709 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Sun, 7 Jan 2024 20:48:00 -0500
Subject: [PATCH 098/150] WIP: Add max_dur and target_shape parameters to
 src/vak/prep/vae/segment_vae.py

---
 src/vak/prep/vae/segment_vae.py | 100 +++++++++++++++++++++++++-------
 1 file changed, 78 insertions(+), 22 deletions(-)

diff --git a/src/vak/prep/vae/segment_vae.py b/src/vak/prep/vae/segment_vae.py
index 5101718d0..a45f41846 100644
--- a/src/vak/prep/vae/segment_vae.py
+++ b/src/vak/prep/vae/segment_vae.py
@@ -27,6 +27,8 @@ def prep_segment_vae_dataset(
         annot_file: str | pathlib.Path | None = None,
         labelset: set | None = None,
         context_s: float = 0.015,
+        max_dur: float | None = 0.2,
+        target_shape: tuple[int, int] | None = (128, 128),
         train_dur: int | None = None,
         val_dur: int | None = None,
         test_dur: int | None = None,
@@ -39,37 +41,91 @@ def prep_segment_vae_dataset(
 
     Parameters
     ----------
-    data_dir
+    data_dir : str, Path
+        Path to directory with files from which to make dataset.
     dataset_path
     dataset_csv_path
-    purpose
-    audio_format
-    spect_params
-    annot_format
+    purpose : str
+        Purpose of the dataset.
+        One of {'train', 'eval', 'predict', 'learncurve'}.
+        These correspond to commands of the vak command-line interface.
+    audio_format : str
+        Format of audio files. One of {'wav', 'cbin'}.
+        Default is ``None``, but either ``audio_format`` or ``spect_format``
+        must be specified.
+    spect_params : dict, vak.config.SpectParams
+        Parameters for creating spectrograms. Default is ``None``.
+    annot_format : str
+        Format of annotations. Any format that can be used with the
+        :module:`crowsetta` library is valid. Default is ``None``.
     annot_file
-    labelset
-    context_s
-    train_dur
-    val_dur
-    test_dur
-    train_set_durs
-    num_replicates
-    spect_key
-    timebins_key
+    labelset : str, list, set
+        Set of unique labels for vocalizations. Strings or integers.
+        Default is ``None``. If not ``None``, then files will be skipped
+        where the associated annotation
+        contains labels not found in ``labelset``.
+        ``labelset`` is converted to a Python ``set`` using
+        :func:`vak.converters.labelset_to_set`.
+        See help for that function for details on how to specify ``labelset``.
+    context_s : float
+        Number of seconds of "context" around unit to
+        add, i.e., time before and after the onset
+        and offset respectively. Default is 0.005s,
+        5 milliseconds.
+    max_dur : float
+        Maximum duration for segments.
+        If a float value is specified,
+        any segment with a duration larger than
+        that value (in seconds) will be omitted
+        from the dataset. Default is None.
+    target_shape : tuple
+        Of ints, (target number of frequency bins,
+        target number of time bins).
+        Spectrograms of units will be reshaped
+        by interpolation to have the specified
+        number of frequency and time bins.
+        The transformation is only applied if both this
+        parameter and ``max_dur`` are specified.
+        Default is None.
+    train_dur : float
+        Total duration of training set, in seconds.
+        When creating a learning curve,
+        training subsets of shorter duration
+        will be drawn from this set. Default is None.
+    val_dur : float
+        Total duration of validation set, in seconds.
+        Default is None.
+    test_dur : float
+        Total duration of test set, in seconds.
+        Default is None.
+    train_set_durs : list
+        of int, durations in seconds of subsets taken from training data
+        to create a learning curve, e.g. [5, 10, 15, 20].
+    num_replicates : int
+        number of times to replicate training for each training set duration
+        to better estimate metrics for a training set of that size.
+        Each replicate uses a different randomly drawn subset of the training
+        data (but of the same duration).
+    spect_key : str
+        key for accessing spectrogram in files. Default is 's'.
+    timebins_key : str
+        key for accessing vector of time bins in files. Default is 't'.
 
     Returns
     -------
 
     """
     dataset_df, shape = prep_unit_dataset(
-        audio_format=audio_format,
-        output_dir=dataset_path,
-        spect_params=spect_params,
-        data_dir=data_dir,
-        annot_format=annot_format,
-        annot_file=annot_file,
-        labelset=labelset,
-        context_s=context_s,
+        audio_format,
+        dataset_path,
+        spect_params,
+        data_dir,
+        annot_format,
+        annot_file,
+        labelset,
+        context_s,
+        max_dur,
+        target_shape,
     )
     if dataset_df.empty:
         raise ValueError(

From 148157e082ccd9274695ba3eac80a24573b63a4e Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Sun, 7 Jan 2024 21:45:12 -0500
Subject: [PATCH 099/150] Fix how we interpolate to use replacement for
 interp2d

---
 src/vak/prep/unit_dataset/unit_dataset.py | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/src/vak/prep/unit_dataset/unit_dataset.py b/src/vak/prep/unit_dataset/unit_dataset.py
index f7d55dfc3..6cde331f3 100644
--- a/src/vak/prep/unit_dataset/unit_dataset.py
+++ b/src/vak/prep/unit_dataset/unit_dataset.py
@@ -12,7 +12,7 @@
 import dask.delayed
 import numpy as np
 import numpy.typing as npt
-import scipy.interpolate
+from scipy.interpolate import RegularGridInterpolator
 import pandas as pd
 from dask.diagnostics import ProgressBar
 
@@ -182,13 +182,14 @@ def spectrogram_from_segment(
     )
     if max_dur and target_shape:
         # if max_dur and target_shape are specified we interpolate spectrogram to target shape, like AVA
-        interp = scipy.interpolate.interp2d(t, f, s, copy=False, bounds_error=False, fill_value=-1 / 1e12)
         target_freqs = np.linspace(f.min(), f.max(), target_shape[0])
         duration = t.max() - t.min()
         new_duration = np.sqrt(duration * max_dur)  # stretched duration
         shoulder = 0.5 * (max_dur - new_duration)
         target_times = np.linspace(t.min() - shoulder, t.max() + shoulder, target_shape[1])
-        s = interp(target_times, target_freqs, assume_sorted=True)
+        ttnew, ffnew = np.meshgrid(target_times, target_freqs, indexing='ij', sparse=True)
+        r = RegularGridInterpolator((t, f), s.T, bounds_error=False, fill_value=-1 / 1e12)
+        s = r((ttnew, ffnew)).T
     return s
 
 

From fae14cf4c9248fa00e829ba7b0f93804b80ab42d Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Mon, 8 Jan 2024 22:09:17 -0500
Subject: [PATCH 100/150] Set defaults for max_dur and target_shape to None in
 prep_segment_vae_dataset signature, fix context_s default to 0.005

---
 src/vak/prep/vae/segment_vae.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/vak/prep/vae/segment_vae.py b/src/vak/prep/vae/segment_vae.py
index a45f41846..7efa006d0 100644
--- a/src/vak/prep/vae/segment_vae.py
+++ b/src/vak/prep/vae/segment_vae.py
@@ -26,9 +26,9 @@ def prep_segment_vae_dataset(
         annot_format: str | None = None,
         annot_file: str | pathlib.Path | None = None,
         labelset: set | None = None,
-        context_s: float = 0.015,
-        max_dur: float | None = 0.2,
-        target_shape: tuple[int, int] | None = (128, 128),
+        context_s: float = 0.005,
+        max_dur: float | None = None,
+        target_shape: tuple[int, int] | None = None,
         train_dur: int | None = None,
         val_dur: int | None = None,
         test_dur: int | None = None,

From 786115987e679bf9fbda400e5564d4879419fb30 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Mon, 8 Jan 2024 22:09:41 -0500
Subject: [PATCH 101/150] Add max_dur and target_shape parameters to
 prep_vae_dataset

---
 src/vak/prep/vae/vae.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/src/vak/prep/vae/vae.py b/src/vak/prep/vae/vae.py
index c432e5c18..c43065e6e 100644
--- a/src/vak/prep/vae/vae.py
+++ b/src/vak/prep/vae/vae.py
@@ -37,6 +37,8 @@ def prep_vae_dataset(
     labelset: set | None = None,
     audio_dask_bag_kwargs: dict | None = None,
     context_s: float = 0.015,
+    max_dur: float | None = None,
+    target_shape: tuple[int, int] | None = None,
     train_dur: int | None = None,
     val_dur: int | None = None,
     test_dur: int | None = None,
@@ -91,6 +93,31 @@ def prep_vae_dataset(
         e.g., ``audio_dask_bag_kwargs = { npartitions = 20 }``.
         Allows for finer-grained control
         when needed to process files of different sizes.
+    context_s : float
+        Number of seconds of "context" around a segment to
+        add, i.e., time before and after the onset
+        and offset respectively. Default is 0.005s,
+        5 milliseconds. This parameter is only used for
+        Parametric UMAP and segment-VAE datasets.
+    max_dur : float
+        Maximum duration for segments.
+        If a float value is specified,
+        any segment with a duration larger than
+        that value (in seconds) will be omitted
+        from the dataset. Default is None.
+        This parameter is only used for
+        segment-VAE datasets.
+    target_shape : tuple
+        Of ints, (target number of frequency bins,
+        target number of time bins).
+        Spectrograms of units will be reshaped
+        by interpolation to have the specified
+        number of frequency and time bins.
+        The transformation is only applied if both this
+        parameter and ``max_dur`` are specified.
+        Default is None.
+        This parameter is only used for
+        segment-VAE datasets.
     train_dur : float
         Total duration of training set, in seconds.
         When creating a learning curve,
@@ -250,6 +277,8 @@ def prep_vae_dataset(
             annot_file,
             labelset,
             context_s,
+            max_dur,
+            target_shape,
             train_dur,
             val_dur,
             test_dur,

From bedc21150212496b4c2fad5a473fa1e3db4b450c Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Mon, 8 Jan 2024 22:09:59 -0500
Subject: [PATCH 102/150] Add max_dur and target_shape parameters to prep_,
 pass to prep_vae_dataset

---
 src/vak/prep/prep_.py | 29 +++++++++++++++++++++++++++++
 1 file changed, 29 insertions(+)

diff --git a/src/vak/prep/prep_.py b/src/vak/prep/prep_.py
index 65e8dacff..1e0e4a06a 100644
--- a/src/vak/prep/prep_.py
+++ b/src/vak/prep/prep_.py
@@ -32,6 +32,8 @@ def prep(
     spect_key: str = "s",
     timebins_key: str = "t",
     context_s: float = 0.015,
+    max_dur: float | None = None,
+    target_shape: tuple[int, int] | None = None,
 ):
     """Prepare datasets for use with neural network models.
 
@@ -144,6 +146,31 @@ def prep(
         key for accessing spectrogram in files. Default is 's'.
     timebins_key : str
         key for accessing vector of time bins in files. Default is 't'.
+    context_s : float
+        Number of seconds of "context" around a segment to
+        add, i.e., time before and after the onset
+        and offset respectively. Default is 0.005s,
+        5 milliseconds. This parameter is only used for
+        Parametric UMAP and segment-VAE datasets.
+    max_dur : float
+        Maximum duration for segments.
+        If a float value is specified,
+        any segment with a duration larger than
+        that value (in seconds) will be omitted
+        from the dataset. Default is None.
+        This parameter is only used for
+        vae-segment datasets.
+    target_shape : tuple
+        Of ints, (target number of frequency bins,
+        target number of time bins).
+        Spectrograms of units will be reshaped
+        by interpolation to have the specified
+        number of frequency and time bins.
+        The transformation is only applied if both this
+        parameter and ``max_dur`` are specified.
+        Default is None.
+        This parameter is only used for
+        vae-segment datasets.
 
     Returns
     -------
@@ -247,6 +274,8 @@ def prep(
             labelset,
             audio_dask_bag_kwargs,
             context_s,
+            max_dur,
+            target_shape,
             train_dur,
             val_dur,
             test_dur,

From 74bf7fe8941bb56ee0657b560947586d10270d5b Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Mon, 8 Jan 2024 22:10:16 -0500
Subject: [PATCH 103/150] Add context_s, max_dur, and target_shape attributes
 to PrepConfig

---
 src/vak/config/prep.py | 56 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

diff --git a/src/vak/config/prep.py b/src/vak/config/prep.py
index 7481d8cc2..6b432542d 100644
--- a/src/vak/config/prep.py
+++ b/src/vak/config/prep.py
@@ -60,6 +60,25 @@ def are_valid_dask_bag_kwargs(instance, attribute, value):
         )
 
 
+def is_valid_target_shape(instance, attribute, value):
+    """validator for target shape"""
+    if not isinstance(value, (tuple, list)):
+        raise TypeError(
+            f"invalid type for {attribute} of {instance}: {type(value)}. Type should be tuple or list."
+        )
+
+    if not all([isinstance(val, int) for val in value]):
+        raise ValueError(
+            f"All values in {attribute} of {instance} should be integers"
+        )
+
+    if not len(value) == 2:
+        raise ValueError(
+            f"{attribute} of {instance} should have length 2: "
+            f"(number of frequency bins, number of time bins). "
+            f"Length was: {len(value)}"
+        )
+
 @attr.s
 class PrepConfig:
     """class to represent [PREP] section of config.toml file
@@ -125,6 +144,31 @@ class PrepConfig:
         in a learning curve. Each replicate uses a different
         randomly drawn subset of the training data (but of the same duration).
         Default is None. Required if config file has a learncurve section.
+    context_s : float
+        Number of seconds of "context" around a segment to
+        add, i.e., time before and after the onset
+        and offset respectively. Default is 0.005s,
+        5 milliseconds. This parameter is only used for
+        Parametric UMAP and segment-VAE datasets.
+    max_dur : float
+        Maximum duration for segments.
+        If a float value is specified,
+        any segment with a duration larger than
+        that value (in seconds) will be omitted
+        from the dataset. Default is None.
+        This parameter is only used for
+        vae-segment datasets.
+    target_shape : tuple
+        Of ints, (target number of frequency bins,
+        target number of time bins).
+        Spectrograms of units will be reshaped
+        by interpolation to have the specified
+        number of frequency and time bins.
+        The transformation is only applied if both this
+        parameter and ``max_dur`` are specified.
+        Default is None.
+        This parameter is only used for
+        vae-segment datasets.
     """
 
     data_dir = attr.ib(converter=expanded_user_path)
@@ -195,6 +239,18 @@ def is_valid_input_type(self, attribute, value):
         validator=validators.optional(instance_of(int)), default=None
     )
 
+    context_s = attr.ib(
+        validator=validators.optional(instance_of(float)), default=None
+    )
+    max_dur = attr.ib(
+        validator=validators.optional(instance_of(float)), default=None
+    )
+    target_shape = attr.ib(
+        converter=converters.optional(tuple),
+        validator=validators.optional(is_valid_target_shape),
+        default=None
+    )
+
     def __attrs_post_init__(self):
         if self.audio_format is not None and self.spect_format is not None:
             raise ValueError("cannot specify audio_format and spect_format")

From 6d749d9d8a76cb868526b939f207d62c442402d9 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Mon, 8 Jan 2024 22:19:41 -0500
Subject: [PATCH 104/150] Add context_s, max_dur, and target_shape options to
 PREP section in config/valid.toml

---
 src/vak/config/valid.toml | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/vak/config/valid.toml b/src/vak/config/valid.toml
index 11cd535f5..0334c264a 100644
--- a/src/vak/config/valid.toml
+++ b/src/vak/config/valid.toml
@@ -21,6 +21,9 @@ val_dur = 15
 test_dur = 30
 train_set_durs = [ 4.5, 6.0 ]
 num_replicates = 2
+context_s = 0.005
+max_dur = 0.2
+target_shape = [128, 128]
 
 [SPECT_PARAMS]
 fft_size = 512

From 86d5f00f63153ee6075f4ab198727d291efd2dd0 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 9 Jan 2024 09:32:45 -0500
Subject: [PATCH 105/150] Fix cli/prep so we pass new options into vak.prep

---
 src/vak/cli/prep.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/vak/cli/prep.py b/src/vak/cli/prep.py
index 3a8ee6b8d..92e36104b 100644
--- a/src/vak/cli/prep.py
+++ b/src/vak/cli/prep.py
@@ -139,6 +139,9 @@ def prep(toml_path):
         test_dur=cfg.prep.test_dur,
         train_set_durs=cfg.prep.train_set_durs,
         num_replicates=cfg.prep.num_replicates,
+        context_s=cfg.prep.context_s,
+        max_dur=cfg.prep.max_dur,
+        target_shape=cfg.prep.target_shape,
     )
 
     # use config and section from above to add dataset_path to config.toml file

From 9ccfabd375fc98ff84842823b92ba6a8043284ad Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 9 Jan 2024 09:33:02 -0500
Subject: [PATCH 106/150] Modify default for PrepConfig.context_s to be 0.005,
 not None (caused an error)

---
 src/vak/config/prep.py | 24 ++++++++++++++++++------
 1 file changed, 18 insertions(+), 6 deletions(-)

diff --git a/src/vak/config/prep.py b/src/vak/config/prep.py
index 6b432542d..4281981d1 100644
--- a/src/vak/config/prep.py
+++ b/src/vak/config/prep.py
@@ -25,7 +25,7 @@ def is_valid_duration(instance, attribute, value):
     """validator for dataset split durations"""
     if type(value) not in {int, float}:
         raise TypeError(
-            f"invalid type for {attribute} of {instance}: {type(value)}. Type should be float or int."
+            f"invalid type for {attribute.name} of {instance}: {type(value)}. Type should be float or int."
         )
 
     if value == -1:  # specifies "use the remainder of the dataset"
@@ -34,7 +34,7 @@ def is_valid_duration(instance, attribute, value):
 
     if not value >= 0:
         raise ValueError(
-            f"value specified for {attribute} of {instance} must be greater than or equal to zero, was {value}"
+            f"value specified for {attribute.name} of {instance} must be greater than or equal to zero, was {value}"
         )
 
 
@@ -64,17 +64,17 @@ def is_valid_target_shape(instance, attribute, value):
     """validator for target shape"""
     if not isinstance(value, (tuple, list)):
         raise TypeError(
-            f"invalid type for {attribute} of {instance}: {type(value)}. Type should be tuple or list."
+            f"invalid type for {attribute.name} of {instance}: {type(value)}. Type should be tuple or list."
         )
 
     if not all([isinstance(val, int) for val in value]):
         raise ValueError(
-            f"All values in {attribute} of {instance} should be integers"
+            f"All values in {attribute.name} of {instance} should be integers"
         )
 
     if not len(value) == 2:
         raise ValueError(
-            f"{attribute} of {instance} should have length 2: "
+            f"{attribute.name} of {instance} should have length 2: "
             f"(number of frequency bins, number of time bins). "
             f"Length was: {len(value)}"
         )
@@ -240,8 +240,20 @@ def is_valid_input_type(self, attribute, value):
     )
 
     context_s = attr.ib(
-        validator=validators.optional(instance_of(float)), default=None
+        default=0.005
     )
+    @context_s.validator
+    def is_valid_context_s(self, attribute, value):
+        if not isinstance(value, float):
+            raise TypeError(
+                f"Value for {attribute.name} should be float but type was: {type(value)}"
+            )
+        if not value >= 0.:
+            raise ValueError(
+                f"Value for {attribute.name} should be greater than or equal to 0., "
+                f"but was: {value}"
+            )
+
     max_dur = attr.ib(
         validator=validators.optional(instance_of(float)), default=None
     )

From c425b215751944b5d1b723337cc032dd8e9c3d6b Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 9 Jan 2024 20:49:34 -0500
Subject: [PATCH 107/150] Remove return_latent_rec parameter from
 vae_elbo_loss, make functional verison of loss return negative value instead
 of returning different value from forward method of class.

---
 src/vak/nn/loss/vae.py | 35 +++++++++++++++++++----------------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/src/vak/nn/loss/vae.py b/src/vak/nn/loss/vae.py
index af533aa79..e2c588338 100644
--- a/src/vak/nn/loss/vae.py
+++ b/src/vak/nn/loss/vae.py
@@ -1,10 +1,12 @@
 from __future__ import annotations
 
-import warnings
 import math
-import torch
+
 import numpy as np
+import torch
+
 
+PI = torch.tensor(math.pi)
 
 def vae_elbo_loss(
     x: torch.Tensor,
@@ -14,29 +16,30 @@ def vae_elbo_loss(
     model_precision: float,
     z_dim: int
 ):
-    pi = torch.tensor(math.pi)
-    x_dim = x.shape
-    elbo = -0.5 * ( torch.sum( torch.pow(z, 2) ) + z_dim * torch.log( 2 * pi ))
+    # E_{q(z|x)} p(z)
+    elbo = -0.5 * ( torch.sum( torch.pow(z, 2) ) + z_dim * torch.log( 2 * PI ))
+
     # E_{q(z|x)} p(x|z)
-    pxz_term = -0.5 * x_dim * (torch.log(2 * pi / model_precision))
-    l2s = torch.sum( torch.pow( x.view( x.shape[0], -1 ) - x_rec, 2), dim=1)
+    x_dim = np.prod(x.shape[1:])
+    pxz_term = -0.5 * x_dim * (torch.log(2 * PI / model_precision))
+    l2s = torch.sum(torch.pow(x - x_rec, 2), dim=1)
     pxz_term = pxz_term - 0.5 * model_precision * torch.sum(l2s)
     elbo = elbo + pxz_term
+
     # H[q(z|x)]
     elbo = elbo + torch.sum(latent_dist.entropy())
-    return elbo
+    return -elbo
+
 
 class VaeElboLoss(torch.nn.Module):
     """"""
 
     def __init__(
         self,
-        return_latent_rec: bool = False,
         model_precision: float = 10.0,
         z_dim: int = 32
     ):
         super().__init__()
-        self.return_latent_rec = return_latent_rec
         self.model_precision = model_precision
         self.z_dim = z_dim
 
@@ -47,9 +50,9 @@ def forward(
         x_rec: torch.Tensor,
         latent_dist: torch.Tensor,
     ):
-        x_shape = x.shape
-        elbo = vae_elbo_loss(x=x, z=z, x_rec=x_rec, latent_dist=latent_dist, model_precision=self.model_precision, z_dim=self.z_dim)
-        if self.return_latent_rec:
-            return -elbo, z.detach().cpu().numpy(), \
-                x_rec.view(-1, x_shape[0], x_shape[1]).detach().cpu().numpy()
-        return -elbo
+        return vae_elbo_loss(
+            x=x, z=z, x_rec=x_rec,
+            latent_dist=latent_dist, model_precision=self.model_precision,
+            z_dim=self.z_dim
+        )
+

From 1a3cee184588b86126ac7bfc8366a7dab6a618f9 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 9 Jan 2024 20:50:04 -0500
Subject: [PATCH 108/150] Fix returned values and what we pass to loss in
 src/vak/models/vae_model.py; do not set batch size of 1 inside
 validation_step

---
 src/vak/models/vae_model.py | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/src/vak/models/vae_model.py b/src/vak/models/vae_model.py
index 7fe872ccc..b3b348b93 100644
--- a/src/vak/models/vae_model.py
+++ b/src/vak/models/vae_model.py
@@ -51,29 +51,25 @@ def training_step(self, batch: tuple, batch_idx: int):
         """
         """
         x = batch["x"]
-        out, _ = self.network(x)
-        z, latent_dist  = itemgetter('z', 'latent_dist')(_)
-        loss = self.loss(x, z, out, latent_dist)
+        x_rec, z, latent_dist = self.network(x)
+        loss = self.loss(x, z, x_rec, latent_dist)
         self.log("train_loss", loss)
         return loss
 
     def validation_step(self, batch: tuple, batch_idx: int):
         x = batch["x"]
-        out, _ = self.network(x)
-        z, latent_dist  = itemgetter('z', 'latent_dist')(_)
+        x_rec, z, latent_dist = self.network(x)
         for metric_name, metric_callable in self.metrics.items():
             if metric_name == "loss":
                 self.log(
                     f"val_{metric_name}",
-                    metric_callable(x, z, out, latent_dist),
-                    batch_size=1,
+                    metric_callable(x, z, x_rec, latent_dist),
                     on_step=True,
                 )
             elif metric_name == "acc":
                 self.log(
                     f"val_{metric_name}",
-                    metric_callable(out, x),
-                    batch_size=1,
+                    metric_callable(x_rec, x),
                     on_step=True,
                 )
 

From 6b2e6895a3dcd3962a40f37c376d0b590be64e3c Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 9 Jan 2024 21:14:18 -0500
Subject: [PATCH 109/150] Add/use normalize parameter in
 src/vak/prep/unit_dataset/unit_dataset.py

---
 src/vak/prep/unit_dataset/unit_dataset.py | 22 ++++++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/src/vak/prep/unit_dataset/unit_dataset.py b/src/vak/prep/unit_dataset/unit_dataset.py
index 6cde331f3..11aeba6a2 100644
--- a/src/vak/prep/unit_dataset/unit_dataset.py
+++ b/src/vak/prep/unit_dataset/unit_dataset.py
@@ -138,7 +138,8 @@ def spectrogram_from_segment(
     segment: Segment,
     spect_params: SpectParamsConfig,
     max_dur: float | None = None,
-    target_shape: tuple[int, int] | None = None
+    target_shape: tuple[int, int] | None = None,
+    normalize: bool = True,
 ) -> npt.NDArray:
     """Compute a spectrogram given a :class:`Segment` instance.
 
@@ -160,6 +161,9 @@ def spectrogram_from_segment(
         The transformation is only applied if both this
         parameter and ``max_dur`` are specified.
         Default is None.
+    normalize : bool
+        If True, min-max normalize the spectrogram.
+        Default is True.
 
     Returns
     -------
@@ -190,6 +194,10 @@ def spectrogram_from_segment(
         ttnew, ffnew = np.meshgrid(target_times, target_freqs, indexing='ij', sparse=True)
         r = RegularGridInterpolator((t, f), s.T, bounds_error=False, fill_value=-1 / 1e12)
         s = r((ttnew, ffnew)).T
+    if normalize:
+        s_max, s_min = s.max(), s.min()
+        s = (s - s_min) / (s_max - s_min)
+        s = np.clip(s, 0.0, 1.0)
     return s
 
 
@@ -251,6 +259,7 @@ def make_spect_return_record(
     output_dir: pathlib.Path,
     max_dur: float | None = None,
     target_shape: tuple[int, int] | None = None,
+    normalize: bool = True,
 ) -> tuple:
     """Helper function that enables parallelized creation of "records",
     i.e. rows for dataframe, from .
@@ -262,6 +271,7 @@ def make_spect_return_record(
         spect_params,
         max_dur,
         target_shape,
+        normalize,
     )
     n_timebins = spect.shape[-1]
 
@@ -323,8 +333,8 @@ def pad_spectrogram(record: tuple, pad_length: float) -> None:
 
 def prep_unit_dataset(
     audio_format: str,
-    output_dir: str,
-    spect_params: dict,
+    output_dir: str | pathlib.Path,
+    spect_params: SpectParamsConfig,
     data_dir: str | pathlib.Path,
     annot_format: str | None = None,
     annot_file: str | pathlib.Path | None = None,
@@ -332,6 +342,7 @@ def prep_unit_dataset(
     context_s: float = 0.005,
     max_dur: float | None = None,
     target_shape: tuple[int, int] | None = None,
+    normalize: bool = True,
 ) -> tuple[pd.DataFrame, tuple[int]]:
     """Prepare a dataset of units from sequences,
     e.g., all syllables segmented out of a dataset of birdsong.
@@ -384,6 +395,9 @@ def prep_unit_dataset(
         The transformation is only applied if both this
         parameter and ``max_dur`` are specified.
         Default is None.
+    normalize : bool
+        If True, min-max normalize the spectrogram.
+        Default is True.
 
     Returns
     -------
@@ -482,7 +496,7 @@ def prep_unit_dataset(
     records_n_timebins_tuples = []
     for ind, segment in enumerate(segments):
         records_n_timebins_tuple = make_spect_return_record(
-            segment, ind, spect_params, output_dir, max_dur, target_shape,
+            segment, ind, spect_params, output_dir, max_dur, target_shape, normalize,
         )
         records_n_timebins_tuples.append(records_n_timebins_tuple)
     with ProgressBar():

From b8be3a9736b47ffa53f0b86e69073e5c7db4c546 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 9 Jan 2024 21:14:27 -0500
Subject: [PATCH 110/150] Add/use normalize parameter in
 src/vak/prep/vae/segment_vae.py

---
 src/vak/prep/vae/segment_vae.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/src/vak/prep/vae/segment_vae.py b/src/vak/prep/vae/segment_vae.py
index 7efa006d0..ac76105a7 100644
--- a/src/vak/prep/vae/segment_vae.py
+++ b/src/vak/prep/vae/segment_vae.py
@@ -8,6 +8,7 @@
 import pandas as pd
 
 from ...common import labels
+from ...config.spect_params import SpectParamsConfig
 from .. import dataset_df_helper, split
 from ..unit_dataset import prep_unit_dataset
 from ..parametric_umap import dataset_arrays
@@ -22,13 +23,14 @@ def prep_segment_vae_dataset(
         dataset_csv_path: str | pathlib.Path,
         purpose: str,
         audio_format: str | None = None,
-        spect_params: dict | None = None,
+        spect_params: SpectParamsConfig | None = None,
         annot_format: str | None = None,
         annot_file: str | pathlib.Path | None = None,
         labelset: set | None = None,
         context_s: float = 0.005,
         max_dur: float | None = None,
         target_shape: tuple[int, int] | None = None,
+        normalize: bool = True,
         train_dur: int | None = None,
         val_dur: int | None = None,
         test_dur: int | None = None,
@@ -87,6 +89,9 @@ def prep_segment_vae_dataset(
         The transformation is only applied if both this
         parameter and ``max_dur`` are specified.
         Default is None.
+    normalize : bool
+        If True, min-max normalize the spectrogram.
+        Default is True.
     train_dur : float
         Total duration of training set, in seconds.
         When creating a learning curve,
@@ -126,6 +131,7 @@ def prep_segment_vae_dataset(
         context_s,
         max_dur,
         target_shape,
+        normalize,
     )
     if dataset_df.empty:
         raise ValueError(

From ef8964eceb7b955c94ff5b86d03d46a37199b1c6 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 9 Jan 2024 21:14:35 -0500
Subject: [PATCH 111/150] Add/use normalize parameter in
 src/vak/prep/vae/vae.py

---
 src/vak/prep/vae/vae.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/src/vak/prep/vae/vae.py b/src/vak/prep/vae/vae.py
index c43065e6e..0a1bd005f 100644
--- a/src/vak/prep/vae/vae.py
+++ b/src/vak/prep/vae/vae.py
@@ -39,6 +39,7 @@ def prep_vae_dataset(
     context_s: float = 0.015,
     max_dur: float | None = None,
     target_shape: tuple[int, int] | None = None,
+    normalize: bool = True,
     train_dur: int | None = None,
     val_dur: int | None = None,
     test_dur: int | None = None,
@@ -118,6 +119,10 @@ def prep_vae_dataset(
         Default is None.
         This parameter is only used for
         segment-VAE datasets.
+    normalize : bool
+        If True, min-max normalize the spectrogram.
+        Default is True. This parameter is only used for
+        segment-VAE datasets.
     train_dur : float
         Total duration of training set, in seconds.
         When creating a learning curve,
@@ -279,6 +284,7 @@ def prep_vae_dataset(
             context_s,
             max_dur,
             target_shape,
+            normalize,
             train_dur,
             val_dur,
             test_dur,

From f8fbca9ca1d99963586895723017334043220668 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 9 Jan 2024 21:14:43 -0500
Subject: [PATCH 112/150] Add/use normalize parameter in src/vak/prep/prep_.py

---
 src/vak/prep/prep_.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/src/vak/prep/prep_.py b/src/vak/prep/prep_.py
index 1e0e4a06a..d6fb216d3 100644
--- a/src/vak/prep/prep_.py
+++ b/src/vak/prep/prep_.py
@@ -34,6 +34,7 @@ def prep(
     context_s: float = 0.015,
     max_dur: float | None = None,
     target_shape: tuple[int, int] | None = None,
+    normalize: bool = True,
 ):
     """Prepare datasets for use with neural network models.
 
@@ -159,7 +160,7 @@ def prep(
         that value (in seconds) will be omitted
         from the dataset. Default is None.
         This parameter is only used for
-        vae-segment datasets.
+        segment-VAE datasets.
     target_shape : tuple
         Of ints, (target number of frequency bins,
         target number of time bins).
@@ -170,7 +171,11 @@ def prep(
         parameter and ``max_dur`` are specified.
         Default is None.
         This parameter is only used for
-        vae-segment datasets.
+        segment-VAE datasets.
+    normalize : bool
+        If True, min-max normalize the spectrogram.
+        Default is True. This parameter is only used for
+        segment-VAE dataset.
 
     Returns
     -------
@@ -276,6 +281,7 @@ def prep(
             context_s,
             max_dur,
             target_shape,
+            normalize,
             train_dur,
             val_dur,
             test_dur,

From 1df6af2d051d90ff6dd02b1c0c1a4145dbe64b2e Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 9 Jan 2024 21:14:58 -0500
Subject: [PATCH 113/150] Add normalize attribute to PrepConfig in
 src/vak/config/prep.py

---
 src/vak/config/prep.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/vak/config/prep.py b/src/vak/config/prep.py
index 4281981d1..3852045a3 100644
--- a/src/vak/config/prep.py
+++ b/src/vak/config/prep.py
@@ -262,6 +262,9 @@ def is_valid_context_s(self, attribute, value):
         validator=validators.optional(is_valid_target_shape),
         default=None
     )
+    normalize = attr.ib(
+        validator=instance_of(bool), default=True
+    )
 
     def __attrs_post_init__(self):
         if self.audio_format is not None and self.spect_format is not None:

From 2ac7c9a1549927004452019d62f541059d20f566 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 9 Jan 2024 21:15:10 -0500
Subject: [PATCH 114/150] Add normalize option to PREP section in
 src/vak/config/valid.toml

---
 src/vak/config/valid.toml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/vak/config/valid.toml b/src/vak/config/valid.toml
index 0334c264a..ff19f97b3 100644
--- a/src/vak/config/valid.toml
+++ b/src/vak/config/valid.toml
@@ -24,6 +24,7 @@ num_replicates = 2
 context_s = 0.005
 max_dur = 0.2
 target_shape = [128, 128]
+normalize = true
 
 [SPECT_PARAMS]
 fft_size = 512

From 41bd1e13c8eca286999861db7633cce1e52c372a Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 9 Jan 2024 21:15:23 -0500
Subject: [PATCH 115/150] Pass normalize argument into vak.prep in
 src/vak/cli/prep.py

---
 src/vak/cli/prep.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/vak/cli/prep.py b/src/vak/cli/prep.py
index 92e36104b..a3f987a16 100644
--- a/src/vak/cli/prep.py
+++ b/src/vak/cli/prep.py
@@ -142,6 +142,7 @@ def prep(toml_path):
         context_s=cfg.prep.context_s,
         max_dur=cfg.prep.max_dur,
         target_shape=cfg.prep.target_shape,
+        normalize=cfg.prep.normalize,
     )
 
     # use config and section from above to add dataset_path to config.toml file

From 2540114096f536191b05fb8825eb3178c2c081f8 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 9 Jan 2024 22:35:41 -0500
Subject: [PATCH 116/150] Fix spectrogram_from_segment to use min value of
 spectrogram as fill value for interpolation

---
 src/vak/prep/unit_dataset/unit_dataset.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/vak/prep/unit_dataset/unit_dataset.py b/src/vak/prep/unit_dataset/unit_dataset.py
index 11aeba6a2..77682537a 100644
--- a/src/vak/prep/unit_dataset/unit_dataset.py
+++ b/src/vak/prep/unit_dataset/unit_dataset.py
@@ -184,6 +184,7 @@ def spectrogram_from_segment(
         spect_params.transform_type,
         spect_params.freq_cutoffs,
     )
+    s_max, s_min = s.max(), s.min()
     if max_dur and target_shape:
         # if max_dur and target_shape are specified we interpolate spectrogram to target shape, like AVA
         target_freqs = np.linspace(f.min(), f.max(), target_shape[0])
@@ -192,10 +193,9 @@ def spectrogram_from_segment(
         shoulder = 0.5 * (max_dur - new_duration)
         target_times = np.linspace(t.min() - shoulder, t.max() + shoulder, target_shape[1])
         ttnew, ffnew = np.meshgrid(target_times, target_freqs, indexing='ij', sparse=True)
-        r = RegularGridInterpolator((t, f), s.T, bounds_error=False, fill_value=-1 / 1e12)
+        r = RegularGridInterpolator((t, f), s.T, bounds_error=False, fill_value=s_min)
         s = r((ttnew, ffnew)).T
     if normalize:
-        s_max, s_min = s.max(), s.min()
         s = (s - s_min) / (s_max - s_min)
         s = np.clip(s, 0.0, 1.0)
     return s

From f1fc26ba11ee9c9892b2efa49a49621372e66a5e Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Tue, 9 Jan 2024 23:08:20 -0500
Subject: [PATCH 117/150] Compute L2 values in vae elbo loss the same way AVA
 does

---
 src/vak/nn/loss/vae.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/src/vak/nn/loss/vae.py b/src/vak/nn/loss/vae.py
index e2c588338..9270763e1 100644
--- a/src/vak/nn/loss/vae.py
+++ b/src/vak/nn/loss/vae.py
@@ -17,12 +17,17 @@ def vae_elbo_loss(
     z_dim: int
 ):
     # E_{q(z|x)} p(z)
-    elbo = -0.5 * ( torch.sum( torch.pow(z, 2) ) + z_dim * torch.log( 2 * PI ))
+    elbo = -0.5 * (torch.sum(torch.pow(z, 2) ) + z_dim * torch.log( 2 * PI ))
 
     # E_{q(z|x)} p(x|z)
     x_dim = np.prod(x.shape[1:])
     pxz_term = -0.5 * x_dim * (torch.log(2 * PI / model_precision))
-    l2s = torch.sum(torch.pow(x - x_rec, 2), dim=1)
+    l2s = torch.sum(
+        torch.pow(
+            x.view(x.shape[0], -1) - x_rec.view(x_rec.shape[0], -1),
+            2),
+        dim=1
+    )
     pxz_term = pxz_term - 0.5 * model_precision * torch.sum(l2s)
     elbo = elbo + pxz_term
 

From 60fadbe3a3036f2d7790ff37715364a58be24881 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 10 Jan 2024 21:44:38 -0500
Subject: [PATCH 118/150] Rewrite src/vak/prep/unit_dataset/unit_dataset.py so
 we can use mean of spectrograms as fill value for interpolation

---
 src/vak/prep/unit_dataset/unit_dataset.py | 216 ++++++++++++++++------
 1 file changed, 158 insertions(+), 58 deletions(-)

diff --git a/src/vak/prep/unit_dataset/unit_dataset.py b/src/vak/prep/unit_dataset/unit_dataset.py
index 77682537a..fc32717d6 100644
--- a/src/vak/prep/unit_dataset/unit_dataset.py
+++ b/src/vak/prep/unit_dataset/unit_dataset.py
@@ -137,8 +137,6 @@ def get_segment_list(
 def spectrogram_from_segment(
     segment: Segment,
     spect_params: SpectParamsConfig,
-    max_dur: float | None = None,
-    target_shape: tuple[int, int] | None = None,
     normalize: bool = True,
 ) -> npt.NDArray:
     """Compute a spectrogram given a :class:`Segment` instance.
@@ -147,20 +145,6 @@ def spectrogram_from_segment(
     ----------
     segment : Segment
     spect_params : SpectParamsConfig
-    max_dur : float
-        Maximum duration for segments.
-        Used with ``target_shape`` when reshaping
-        the spectrogram via interpolation.
-        Default is None.
-    target_shape : tuple
-        Of ints, (target number of frequency bins,
-        target number of time bins).
-        Spectrograms of units will be reshaped
-        by interpolation to have the specified
-        number of frequency and time bins.
-        The transformation is only applied if both this
-        parameter and ``max_dur`` are specified.
-        Default is None.
     normalize : bool
         If True, min-max normalize the spectrogram.
         Default is True.
@@ -184,21 +168,12 @@ def spectrogram_from_segment(
         spect_params.transform_type,
         spect_params.freq_cutoffs,
     )
-    s_max, s_min = s.max(), s.min()
-    if max_dur and target_shape:
-        # if max_dur and target_shape are specified we interpolate spectrogram to target shape, like AVA
-        target_freqs = np.linspace(f.min(), f.max(), target_shape[0])
-        duration = t.max() - t.min()
-        new_duration = np.sqrt(duration * max_dur)  # stretched duration
-        shoulder = 0.5 * (max_dur - new_duration)
-        target_times = np.linspace(t.min() - shoulder, t.max() + shoulder, target_shape[1])
-        ttnew, ffnew = np.meshgrid(target_times, target_freqs, indexing='ij', sparse=True)
-        r = RegularGridInterpolator((t, f), s.T, bounds_error=False, fill_value=s_min)
-        s = r((ttnew, ffnew)).T
+
     if normalize:
+        s_max, s_min = s.max(), s.min()
         s = (s - s_min) / (s_max - s_min)
         s = np.clip(s, 0.0, 1.0)
-    return s
+    return s, f, t
 
 
 @attrs.define
@@ -209,6 +184,8 @@ class SpectToSave:
     """
 
     spect: npt.NDArray
+    f: npt.NDArray
+    t: npt.NDArray
     ind: int
     audio_path: str
 
@@ -228,18 +205,24 @@ def save_spect(
 
     Returns
     -------
-    npy_path : str
-        Path to npy file containing spectrogram inside ``output_dir``
+    npz_path : str
+        Path to npz file containing spectrogram inside ``output_dir``
     """
+    spect_dict = {
+        "s": spect_to_save.spect,
+        "f": spect_to_save.f,
+        "t": spect_to_save.t,
+    }
+
     basename = (
         os.path.basename(spect_to_save.audio_path)
         + f"-segment-{spect_to_save.ind}"
     )
-    npy_path = os.path.join(
-        os.path.normpath(output_dir), basename + ".spect.npy"
+    npz_path = os.path.join(
+        os.path.normpath(output_dir), basename + ".spect.npz"
     )
-    np.save(npy_path, spect_to_save.spect)
-    return npy_path
+    np.savez(npz_path, **spect_dict)
+    return npz_path
 
 
 def abspath(a_path):
@@ -257,25 +240,21 @@ def make_spect_return_record(
     ind: int,
     spect_params: SpectParamsConfig,
     output_dir: pathlib.Path,
-    max_dur: float | None = None,
-    target_shape: tuple[int, int] | None = None,
     normalize: bool = True,
-) -> tuple:
+) -> tuple[tuple, int, float]:
     """Helper function that enables parallelized creation of "records",
     i.e. rows for dataframe, from .
     Accepts a two-element tuple containing (1) a dictionary that represents a spectrogram
     and (2) annotation for that file"""
 
-    spect = spectrogram_from_segment(
+    s, f, t = spectrogram_from_segment(
         segment,
         spect_params,
-        max_dur,
-        target_shape,
         normalize,
     )
-    n_timebins = spect.shape[-1]
+    n_timebins = s.shape[-1]
 
-    spect_to_save = SpectToSave(spect, ind, segment.audio_path)
+    spect_to_save = SpectToSave(s, f, t, ind, segment.audio_path)
     spect_path = save_spect(spect_to_save, output_dir)
     record = tuple(
         [
@@ -291,21 +270,31 @@ def make_spect_return_record(
         ]
     )
 
-    return record, n_timebins
+    return record, n_timebins, s.mean()
 
 
 @dask.delayed
 def pad_spectrogram(record: tuple, pad_length: float) -> None:
     """Pads a spectrogram to a specified length on the left and right sides.
+
     Spectrogram is saved again after padding.
 
     Parameters
     ----------
     record : tuple
+        Returned by :func:`make_spect_return_record`,
+        has path to spectrogram file.
     pad_length : int
+        Length to which spectrogram should be padded.
+
+    Returns
+    -------
+    shape : tuple
+        Shape of spectrogram after padding.
     """
     spect_path = record[0]  # 'spect_path'
-    spect = np.load(spect_path)
+    spect_dict = np.load(spect_path)
+    spect = spect_dict["s"]
 
     excess_needed = pad_length - spect.shape[-1]
     pad_left = np.floor(float(excess_needed) / 2).astype("int")
@@ -313,8 +302,83 @@ def pad_spectrogram(record: tuple, pad_length: float) -> None:
     spect_padded = np.pad(
         spect, [(0, 0), (pad_left, pad_right)], "constant", constant_values=0
     )
-    np.save(spect_path, spect_padded)
-    return spect_padded.shape
+    new_spect_path = str(spect_path).replace(".npz", ".npy")
+    np.save(new_spect_path, spect_padded)
+    return new_spect_path, spect_padded.shape
+
+
+@dask.delayed
+def interp_spectrogram(
+    record: tuple,
+    fill_value: float,
+    max_dur: float,
+    target_shape: tuple[int, int],
+    normalize: bool = True,
+):
+    """Linearly interpolate a spectrogram to a target shape.
+
+    Spectrogram is saved again after interpolation.
+
+    Uses :func:`scipy.interpolate.RegularGridInterpolator`
+    to treat the spectrogram as if it were a function of the
+    frequencies vector :math:`f` and the times vector :math:`t`,
+    then interpolates given new frequencies and times
+    with the same range but with the number of values
+    specified by the argument ``target_shape``.
+
+    Parameters
+    ----------
+    record : tuple
+        Returned by :func:`make_spect_return_record`,
+        has path to spectrogram file.
+    fill_value : float
+        Value to fill in when the approximated function
+        is extrapolating outside of data.
+    max_dur : float
+        Maximum duration for segments.
+        Used with ``target_shape`` when reshaping
+        the spectrogram via interpolation.
+        Default is None.
+    target_shape : tuple
+        Of ints, (target number of frequency bins,
+        target number of time bins).
+        Spectrograms of units will be reshaped
+        by interpolation to have the specified
+        number of frequency and time bins.
+        The transformation is only applied if both this
+        parameter and ``max_dur`` are specified.
+        Default is None.
+    normalize : bool
+        If True, min-max normalize the spectrogram.
+        Default is True.
+
+    Returns
+    -------
+    shape : tuple
+        Shape of spectrogram after interpolation.
+    """
+    spect_path = record[0]  # 'spect_path'
+    spect_dict = np.load(spect_path)
+    s = spect_dict["s"]
+    f = spect_dict["f"]
+    t = spect_dict["t"]
+
+    # if max_dur and target_shape are specified we interpolate spectrogram to target shape, like AVA
+    target_freqs = np.linspace(f.min(), f.max(), target_shape[0])
+    duration = t.max() - t.min()
+    new_duration = np.sqrt(duration * max_dur)  # stretched duration
+    shoulder = 0.5 * (max_dur - new_duration)
+    target_times = np.linspace(t.min() - shoulder, t.max() + shoulder, target_shape[1])
+    ttnew, ffnew = np.meshgrid(target_times, target_freqs, indexing='ij', sparse=True)
+    r = RegularGridInterpolator((t, f), s.T, bounds_error=False, fill_value=fill_value)
+    s = r((ttnew, ffnew)).T
+    if normalize:
+        s_max, s_min = s.max(), s.min()
+        s = (s - s_min) / (s_max - s_min)
+        s = np.clip(s, 0.0, 1.0)
+    new_spect_path = str(spect_path).replace(".npz", ".npy")
+    np.save(new_spect_path, s)
+    return new_spect_path, s.shape
 
 
 # constant, used for names of columns in DataFrame below
@@ -496,7 +560,7 @@ def prep_unit_dataset(
     records_n_timebins_tuples = []
     for ind, segment in enumerate(segments):
         records_n_timebins_tuple = make_spect_return_record(
-            segment, ind, spect_params, output_dir, max_dur, target_shape, normalize,
+            segment, ind, spect_params, output_dir, normalize,
         )
         records_n_timebins_tuples.append(records_n_timebins_tuple)
     with ProgressBar():
@@ -504,26 +568,62 @@ def prep_unit_dataset(
             *records_n_timebins_tuples
         )
 
-    records, n_timebins_list = [], []
+    # we use n_timebins to pad to the same length,
+    # and spect_means to fill with the mean across all spectrograms
+    # when we interpolate
+    records, n_timebins_list, spect_means_list = [], [], []
     for records_n_timebins_tuple in records_n_timebins_tuples:
-        record, n_timebins = records_n_timebins_tuple
+        record, n_timebins, spect_mean = records_n_timebins_tuple
         records.append(record)
         n_timebins_list.append(n_timebins)
+        spect_means_list.append(spect_mean)
+
+    # ---- either interpolate or pad spectrograms so they are all the same size
+    if max_dur is not None and target_shape is not None:
+        # then we interpolate
+        spect_mean = np.array(spect_means_list).mean()
+
+        interpolated = []
+        for record in records:
+            interpolated.append(
+                interp_spectrogram(
+                    record, spect_mean, max_dur, target_shape, normalize
+                ))
+        with ProgressBar():
+            path_shape_tuples = dask.compute(*interpolated)
 
-    pad_length = max(n_timebins_list)
-
-    padded = []
-    for record in records:
-        padded.append(pad_spectrogram(record, pad_length))
-    with ProgressBar():
-        shapes: list[tuple[int, int]] = dask.compute(*padded)
-
+    else:
+        # then we pad
+        pad_length = max(n_timebins_list)
+
+        padded = []
+        for record in records:
+            padded.append(pad_spectrogram(record, pad_length))
+        with ProgressBar():
+            path_shape_tuples = dask.compute(*padded)
+
+    # ---- clean up npz files with spectrograms, don't need anymore
+    npz_files = sorted(output_dir.glob('*npz'))
+    for npz_file in npz_files:
+        npz_file.unlink()
+
+    paths, shapes = [], []
+    for path, shape in path_shape_tuples:
+        paths.append(path)
+        shapes.append(shape)
     shape = set(shapes)
     assert (
         len(shape) == 1
     ), f"Did not find a single unique shape for all spectrograms. Instead found: {shape}"
     shape = shape.pop()
 
-    unit_df = pd.DataFrame.from_records(records, columns=DF_COLUMNS)
+    new_records = []
+    for record, path in zip(records, paths):
+        new_records.append(
+            tuple(
+                [path, *record[1:]]
+            )
+        )
+    unit_df = pd.DataFrame.from_records(new_records, columns=DF_COLUMNS)
 
     return unit_df, shape

From 869359df39a07ac4f6916d45e6398797698c4670 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 10 Jan 2024 22:00:04 -0500
Subject: [PATCH 119/150] Add min_val, max_val, and normalize attributes to
 SpectParamsConfig

---
 src/vak/config/spect_params.py | 35 +++++++++++++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/src/vak/config/spect_params.py b/src/vak/config/spect_params.py
index 4a61942a6..cc17aabed 100644
--- a/src/vak/config/spect_params.py
+++ b/src/vak/config/spect_params.py
@@ -15,7 +15,9 @@ def freq_cutoffs_validator(instance, attribute, value):
         )
 
 
-VALID_TRANSFORM_TYPES = {"log_spect", "log_spect_plus_one"}
+VALID_TRANSFORM_TYPES = {
+    "log", "log_spect", "log_spect_plus_one"
+}
 
 
 def is_valid_transform_type(instance, attribute, value):
@@ -57,6 +59,24 @@ class SpectParamsConfig:
     audio_path_key : str
         key for accessing path to source audio file for spectogram in files.
         Default is 'audio_path'.
+    min_val : float, optional
+        Minimum value to allow in spectrogram.
+        All values less than this will be set to this value.
+        This operation is applied *after* the transform
+        specified by ``transform_type``.
+        Default is None.
+    max_val : float, optional
+        Maximum value to allow in spectrogram.
+        All values greater than this will be set to this value.
+        This operation is applied *after* the transform
+        specified by ``transform_type``.
+        Default is None.
+    normalize : bool
+        If True, min-max normalize the spectrogram.
+        Normalization is done *after* the transform
+        specified by ``transform_type``, and *after*
+        the ``min_val`` and ``max_val`` operations.
+        Default is False.
     """
 
     fft_size = attr.ib(converter=int, validator=instance_of(int), default=512)
@@ -79,3 +99,16 @@ class SpectParamsConfig:
     freqbins_key = attr.ib(validator=instance_of(str), default="f")
     timebins_key = attr.ib(validator=instance_of(str), default="t")
     audio_path_key = attr.ib(validator=instance_of(str), default="audio_path")
+    min_val = attr.ib(
+        validator=validators.optional(instance_of(float)),
+        default=None
+    )
+    max_val = attr.ib(
+        validator=validators.optional(instance_of(float)),
+        default=None
+    )
+    normalize = attr.ib(
+        validator=instance_of(bool),
+        default=False,
+    )
+

From 7ad1d0721ad45e14e57e3d04fe34adf12afd44c4 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 10 Jan 2024 22:01:11 -0500
Subject: [PATCH 120/150] Add min_val, max_val, and normalize parameters to
 spectrogram function, and add 'log' transform

---
 src/vak/prep/spectrogram_dataset/spect.py | 35 ++++++++++++++++++++++-
 1 file changed, 34 insertions(+), 1 deletion(-)

diff --git a/src/vak/prep/spectrogram_dataset/spect.py b/src/vak/prep/spectrogram_dataset/spect.py
index 602da575a..8b92fff4f 100644
--- a/src/vak/prep/spectrogram_dataset/spect.py
+++ b/src/vak/prep/spectrogram_dataset/spect.py
@@ -35,6 +35,9 @@ def spectrogram(
     thresh: float | None = None,
     transform_type: str | None = None,
     freq_cutoffs: list[int, int] | None = None,
+    min_val: float | None = None,
+    max_val: float | None = None,
+    normalize: bool = False,
 ) -> tuple[npt.NDArray, npt.NDArray, npt.NDArray]:
     """creates a spectrogram
 
@@ -57,6 +60,24 @@ def spectrogram(
         threshold minimum power for log spectrogram
     freq_cutoffs : tuple
         of two elements, lower and higher frequencies.
+    min_val : float, optional
+        Minimum value to allow in spectrogram.
+        All values less than this will be set to this value.
+        This operation is applied *after* the transform
+        specified by ``transform_type``.
+        Default is None.
+    max_val : float, optional
+        Maximum value to allow in spectrogram.
+        All values greater than this will be set to this value.
+        This operation is applied *after* the transform
+        specified by ``transform_type``.
+        Default is None.
+    normalize : bool
+        If True, min-max normalize the spectrogram.
+        Normalization is done *after* the transform
+        specified by ``transform_type``, and *after*
+        the ``min_val`` and ``max_val`` operations.
+        Default is False.
 
     Return
     ------
@@ -80,7 +101,9 @@ def spectrogram(
     )[:3]
 
     if transform_type:
-        if transform_type == "log_spect":
+        if transform_type == "log":
+            spect = np.log(np.abs(spect) + np.finfo(spect).eps)
+        elif transform_type == "log_spect":
             spect /= spect.max()  # volume normalize to max 1
             spect = np.log10(spect)  # take log
             if thresh:
@@ -96,6 +119,16 @@ def spectrogram(
                 spect < thresh
             ] = thresh  # set anything less than the threshold as the threshold
 
+    if min_val:
+        spect[spect < min_val] = min_val
+    if max_val:
+        spect[spect > max_val] = max_val
+
+    if normalize:
+        s_max, s_min = spect.max(), spect.min()
+        spect = (spect - s_min) / (s_max - s_min)
+        spect = np.clip(spect, 0.0, 1.0)
+
     if freq_cutoffs:
         f_inds = np.nonzero(
             (freqbins >= freq_cutoffs[0]) & (freqbins < freq_cutoffs[1])

From 8a8fdd1a10fc800c85eabf0e268d6ff786b1f281 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 10 Jan 2024 22:08:52 -0500
Subject: [PATCH 121/150] Remove using spectrograms mean as fill value in
 src/vak/prep/unit_dataset/unit_dataset.py, remove normalize parameter from
 functions, instead use spect_params.normalize + spect_params.min_val +
 spect_params.max_val in call to spectrogram function

---
 src/vak/prep/unit_dataset/unit_dataset.py | 39 ++++++++---------------
 1 file changed, 13 insertions(+), 26 deletions(-)

diff --git a/src/vak/prep/unit_dataset/unit_dataset.py b/src/vak/prep/unit_dataset/unit_dataset.py
index fc32717d6..67147c5f2 100644
--- a/src/vak/prep/unit_dataset/unit_dataset.py
+++ b/src/vak/prep/unit_dataset/unit_dataset.py
@@ -137,7 +137,6 @@ def get_segment_list(
 def spectrogram_from_segment(
     segment: Segment,
     spect_params: SpectParamsConfig,
-    normalize: bool = True,
 ) -> npt.NDArray:
     """Compute a spectrogram given a :class:`Segment` instance.
 
@@ -145,9 +144,7 @@ def spectrogram_from_segment(
     ----------
     segment : Segment
     spect_params : SpectParamsConfig
-    normalize : bool
-        If True, min-max normalize the spectrogram.
-        Default is True.
+
 
     Returns
     -------
@@ -167,12 +164,11 @@ def spectrogram_from_segment(
         spect_params.thresh,
         spect_params.transform_type,
         spect_params.freq_cutoffs,
+        spect_params.min_val,
+        spect_params.max_val,
+        spect_params.normalize,
     )
 
-    if normalize:
-        s_max, s_min = s.max(), s.min()
-        s = (s - s_min) / (s_max - s_min)
-        s = np.clip(s, 0.0, 1.0)
     return s, f, t
 
 
@@ -240,7 +236,6 @@ def make_spect_return_record(
     ind: int,
     spect_params: SpectParamsConfig,
     output_dir: pathlib.Path,
-    normalize: bool = True,
 ) -> tuple[tuple, int, float]:
     """Helper function that enables parallelized creation of "records",
     i.e. rows for dataframe, from .
@@ -250,7 +245,7 @@ def make_spect_return_record(
     s, f, t = spectrogram_from_segment(
         segment,
         spect_params,
-        normalize,
+
     )
     n_timebins = s.shape[-1]
 
@@ -307,10 +302,13 @@ def pad_spectrogram(record: tuple, pad_length: float) -> None:
     return new_spect_path, spect_padded.shape
 
 
+# what AVA uses
+FILL_VALUE = -1 / 1e-12
+
+
 @dask.delayed
 def interp_spectrogram(
     record: tuple,
-    fill_value: float,
     max_dur: float,
     target_shape: tuple[int, int],
     normalize: bool = True,
@@ -331,9 +329,6 @@ def interp_spectrogram(
     record : tuple
         Returned by :func:`make_spect_return_record`,
         has path to spectrogram file.
-    fill_value : float
-        Value to fill in when the approximated function
-        is extrapolating outside of data.
     max_dur : float
         Maximum duration for segments.
         Used with ``target_shape`` when reshaping
@@ -370,7 +365,7 @@ def interp_spectrogram(
     shoulder = 0.5 * (max_dur - new_duration)
     target_times = np.linspace(t.min() - shoulder, t.max() + shoulder, target_shape[1])
     ttnew, ffnew = np.meshgrid(target_times, target_freqs, indexing='ij', sparse=True)
-    r = RegularGridInterpolator((t, f), s.T, bounds_error=False, fill_value=fill_value)
+    r = RegularGridInterpolator((t, f), s.T, bounds_error=False, fill_value=FILL_VALUE)
     s = r((ttnew, ffnew)).T
     if normalize:
         s_max, s_min = s.max(), s.min()
@@ -406,7 +401,6 @@ def prep_unit_dataset(
     context_s: float = 0.005,
     max_dur: float | None = None,
     target_shape: tuple[int, int] | None = None,
-    normalize: bool = True,
 ) -> tuple[pd.DataFrame, tuple[int]]:
     """Prepare a dataset of units from sequences,
     e.g., all syllables segmented out of a dataset of birdsong.
@@ -459,9 +453,6 @@ def prep_unit_dataset(
         The transformation is only applied if both this
         parameter and ``max_dur`` are specified.
         Default is None.
-    normalize : bool
-        If True, min-max normalize the spectrogram.
-        Default is True.
 
     Returns
     -------
@@ -560,7 +551,7 @@ def prep_unit_dataset(
     records_n_timebins_tuples = []
     for ind, segment in enumerate(segments):
         records_n_timebins_tuple = make_spect_return_record(
-            segment, ind, spect_params, output_dir, normalize,
+            segment, ind, spect_params, output_dir,
         )
         records_n_timebins_tuples.append(records_n_timebins_tuple)
     with ProgressBar():
@@ -571,23 +562,19 @@ def prep_unit_dataset(
     # we use n_timebins to pad to the same length,
     # and spect_means to fill with the mean across all spectrograms
     # when we interpolate
-    records, n_timebins_list, spect_means_list = [], [], []
+    records, n_timebins_list = [], []
     for records_n_timebins_tuple in records_n_timebins_tuples:
         record, n_timebins, spect_mean = records_n_timebins_tuple
         records.append(record)
         n_timebins_list.append(n_timebins)
-        spect_means_list.append(spect_mean)
 
     # ---- either interpolate or pad spectrograms so they are all the same size
     if max_dur is not None and target_shape is not None:
-        # then we interpolate
-        spect_mean = np.array(spect_means_list).mean()
-
         interpolated = []
         for record in records:
             interpolated.append(
                 interp_spectrogram(
-                    record, spect_mean, max_dur, target_shape, normalize
+                    record, max_dur, target_shape, spect_params.normalize
                 ))
         with ProgressBar():
             path_shape_tuples = dask.compute(*interpolated)

From 6ae1e3b5a72b7954fdeb7eb4bc25e33feb441102 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 10 Jan 2024 22:18:24 -0500
Subject: [PATCH 122/150] Remove normalize attribute from PrepConfig

---
 src/vak/config/prep.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/src/vak/config/prep.py b/src/vak/config/prep.py
index 3852045a3..4281981d1 100644
--- a/src/vak/config/prep.py
+++ b/src/vak/config/prep.py
@@ -262,9 +262,6 @@ def is_valid_context_s(self, attribute, value):
         validator=validators.optional(is_valid_target_shape),
         default=None
     )
-    normalize = attr.ib(
-        validator=instance_of(bool), default=True
-    )
 
     def __attrs_post_init__(self):
         if self.audio_format is not None and self.spect_format is not None:

From 32a0d47393f43c031c08ae1466f2b92acf24ce52 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 10 Jan 2024 22:18:41 -0500
Subject: [PATCH 123/150] Add options to SPECT_PARAMS in valid_toml, remove
 normalize option from PREP

---
 src/vak/config/valid.toml | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/src/vak/config/valid.toml b/src/vak/config/valid.toml
index ff19f97b3..d7da02f18 100644
--- a/src/vak/config/valid.toml
+++ b/src/vak/config/valid.toml
@@ -24,7 +24,6 @@ num_replicates = 2
 context_s = 0.005
 max_dur = 0.2
 target_shape = [128, 128]
-normalize = true
 
 [SPECT_PARAMS]
 fft_size = 512
@@ -36,6 +35,9 @@ spect_key = 's'
 freqbins_key = 'f'
 timebins_key = 't'
 audio_path_key = 'audio_path'
+min_val = -6.0
+max_val = 0.0
+normalize = true
 
 [TRAIN]
 model = 'TweetyNet'

From dd9fcf5952627fec9d9bb31c41e0dc919489097f Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 10 Jan 2024 22:22:15 -0500
Subject: [PATCH 124/150] Remove normalize paramter from
 src/vak/prep/vae/vae.py

---
 src/vak/prep/vae/vae.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/vak/prep/vae/vae.py b/src/vak/prep/vae/vae.py
index 0a1bd005f..131d735b1 100644
--- a/src/vak/prep/vae/vae.py
+++ b/src/vak/prep/vae/vae.py
@@ -39,7 +39,6 @@ def prep_vae_dataset(
     context_s: float = 0.015,
     max_dur: float | None = None,
     target_shape: tuple[int, int] | None = None,
-    normalize: bool = True,
     train_dur: int | None = None,
     val_dur: int | None = None,
     test_dur: int | None = None,
@@ -119,10 +118,6 @@ def prep_vae_dataset(
         Default is None.
         This parameter is only used for
         segment-VAE datasets.
-    normalize : bool
-        If True, min-max normalize the spectrogram.
-        Default is True. This parameter is only used for
-        segment-VAE datasets.
     train_dur : float
         Total duration of training set, in seconds.
         When creating a learning curve,

From 23e8244bfd0331a7828d8f22b48642ea0e606226 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 10 Jan 2024 22:23:00 -0500
Subject: [PATCH 125/150] Remove normalize parameter from
 src/vak/prep/vae/segment_vae.py

---
 src/vak/prep/vae/segment_vae.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/vak/prep/vae/segment_vae.py b/src/vak/prep/vae/segment_vae.py
index ac76105a7..1aedf7fad 100644
--- a/src/vak/prep/vae/segment_vae.py
+++ b/src/vak/prep/vae/segment_vae.py
@@ -30,7 +30,6 @@ def prep_segment_vae_dataset(
         context_s: float = 0.005,
         max_dur: float | None = None,
         target_shape: tuple[int, int] | None = None,
-        normalize: bool = True,
         train_dur: int | None = None,
         val_dur: int | None = None,
         test_dur: int | None = None,
@@ -89,9 +88,6 @@ def prep_segment_vae_dataset(
         The transformation is only applied if both this
         parameter and ``max_dur`` are specified.
         Default is None.
-    normalize : bool
-        If True, min-max normalize the spectrogram.
-        Default is True.
     train_dur : float
         Total duration of training set, in seconds.
         When creating a learning curve,
@@ -131,7 +127,6 @@ def prep_segment_vae_dataset(
         context_s,
         max_dur,
         target_shape,
-        normalize,
     )
     if dataset_df.empty:
         raise ValueError(

From 89ec944341f6483238464ad690c561596c68ebec Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 10 Jan 2024 22:23:09 -0500
Subject: [PATCH 126/150] Fix up remove normalize from src/vak/prep/vae/vae.py

---
 src/vak/prep/vae/vae.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/vak/prep/vae/vae.py b/src/vak/prep/vae/vae.py
index 131d735b1..c43065e6e 100644
--- a/src/vak/prep/vae/vae.py
+++ b/src/vak/prep/vae/vae.py
@@ -279,7 +279,6 @@ def prep_vae_dataset(
             context_s,
             max_dur,
             target_shape,
-            normalize,
             train_dur,
             val_dur,
             test_dur,

From 694a30b06e55e388d360a91badd3c0e0125e69e0 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Wed, 10 Jan 2024 22:23:19 -0500
Subject: [PATCH 127/150] Remove normalize parameter from src/vak/prep/prep_.py

---
 src/vak/prep/prep_.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/src/vak/prep/prep_.py b/src/vak/prep/prep_.py
index d6fb216d3..14e729822 100644
--- a/src/vak/prep/prep_.py
+++ b/src/vak/prep/prep_.py
@@ -172,10 +172,6 @@ def prep(
         Default is None.
         This parameter is only used for
         segment-VAE datasets.
-    normalize : bool
-        If True, min-max normalize the spectrogram.
-        Default is True. This parameter is only used for
-        segment-VAE dataset.
 
     Returns
     -------
@@ -281,7 +277,6 @@ def prep(
             context_s,
             max_dur,
             target_shape,
-            normalize,
             train_dur,
             val_dur,
             test_dur,

From 18a5029f019c2729014d9fc22a077e0a31c64fbd Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Thu, 11 Jan 2024 09:36:40 -0500
Subject: [PATCH 128/150] Remove normalize arg (no longer exists) in call to
 prep in src/vak/cli/prep.py

---
 src/vak/cli/prep.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/vak/cli/prep.py b/src/vak/cli/prep.py
index a3f987a16..92e36104b 100644
--- a/src/vak/cli/prep.py
+++ b/src/vak/cli/prep.py
@@ -142,7 +142,6 @@ def prep(toml_path):
         context_s=cfg.prep.context_s,
         max_dur=cfg.prep.max_dur,
         target_shape=cfg.prep.target_shape,
-        normalize=cfg.prep.normalize,
     )
 
     # use config and section from above to add dataset_path to config.toml file

From 22c71ccfdce80cb5c50b4bfa2215afe4ab1b7637 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Thu, 11 Jan 2024 09:37:05 -0500
Subject: [PATCH 129/150] Fix use of finfo in
 src/vak/prep/spectrogram_dataset/spect.py

---
 src/vak/prep/spectrogram_dataset/spect.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/vak/prep/spectrogram_dataset/spect.py b/src/vak/prep/spectrogram_dataset/spect.py
index 8b92fff4f..910344eb4 100644
--- a/src/vak/prep/spectrogram_dataset/spect.py
+++ b/src/vak/prep/spectrogram_dataset/spect.py
@@ -102,7 +102,7 @@ def spectrogram(
 
     if transform_type:
         if transform_type == "log":
-            spect = np.log(np.abs(spect) + np.finfo(spect).eps)
+            spect = np.log(np.abs(spect) + np.finfo(spect.dtype).eps)
         elif transform_type == "log_spect":
             spect /= spect.max()  # volume normalize to max 1
             spect = np.log10(spect)  # take log

From 0696e563c145d2971097a764f176ad47806ceb96 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Thu, 11 Jan 2024 09:37:52 -0500
Subject: [PATCH 130/150] Fix how we get/use fill/pad value in
 src/vak/prep/unit_dataset/unit_dataset.py -- AVA default caused entire
 spectrogram of 1 value

---
 src/vak/prep/unit_dataset/unit_dataset.py | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/src/vak/prep/unit_dataset/unit_dataset.py b/src/vak/prep/unit_dataset/unit_dataset.py
index 67147c5f2..6fbda062d 100644
--- a/src/vak/prep/unit_dataset/unit_dataset.py
+++ b/src/vak/prep/unit_dataset/unit_dataset.py
@@ -245,7 +245,6 @@ def make_spect_return_record(
     s, f, t = spectrogram_from_segment(
         segment,
         spect_params,
-
     )
     n_timebins = s.shape[-1]
 
@@ -269,7 +268,7 @@ def make_spect_return_record(
 
 
 @dask.delayed
-def pad_spectrogram(record: tuple, pad_length: float) -> None:
+def pad_spectrogram(record: tuple, pad_length: float, padval: float = 0.) -> None:
     """Pads a spectrogram to a specified length on the left and right sides.
 
     Spectrogram is saved again after padding.
@@ -295,23 +294,20 @@ def pad_spectrogram(record: tuple, pad_length: float) -> None:
     pad_left = np.floor(float(excess_needed) / 2).astype("int")
     pad_right = np.ceil(float(excess_needed) / 2).astype("int")
     spect_padded = np.pad(
-        spect, [(0, 0), (pad_left, pad_right)], "constant", constant_values=0
+        spect, [(0, 0), (pad_left, pad_right)], "constant", constant_values=padval
     )
     new_spect_path = str(spect_path).replace(".npz", ".npy")
     np.save(new_spect_path, spect_padded)
     return new_spect_path, spect_padded.shape
 
 
-# what AVA uses
-FILL_VALUE = -1 / 1e-12
-
-
 @dask.delayed
 def interp_spectrogram(
     record: tuple,
     max_dur: float,
     target_shape: tuple[int, int],
     normalize: bool = True,
+    fill_value: float = 0.
 ):
     """Linearly interpolate a spectrogram to a target shape.
 
@@ -365,7 +361,7 @@ def interp_spectrogram(
     shoulder = 0.5 * (max_dur - new_duration)
     target_times = np.linspace(t.min() - shoulder, t.max() + shoulder, target_shape[1])
     ttnew, ffnew = np.meshgrid(target_times, target_freqs, indexing='ij', sparse=True)
-    r = RegularGridInterpolator((t, f), s.T, bounds_error=False, fill_value=FILL_VALUE)
+    r = RegularGridInterpolator((t, f), s.T, bounds_error=False, fill_value=fill_value)
     s = r((ttnew, ffnew)).T
     if normalize:
         s_max, s_min = s.max(), s.min()
@@ -543,7 +539,7 @@ def prep_unit_dataset(
         segment for segment_list in segments for segment in segment_list
     ]
 
-    # ---- make and save all spectrograms *before* padding
+    # ---- make and save all spectrograms *before* interpolating or padding
     # This is a design choice to avoid keeping all the spectrograms in memory
     # but since we want to pad all spectrograms to be the same width,
     # it requires us to go back, load each one, and pad it.
@@ -569,12 +565,14 @@ def prep_unit_dataset(
         n_timebins_list.append(n_timebins)
 
     # ---- either interpolate or pad spectrograms so they are all the same size
+    fill_value = spect_params.min_val if spect_params.min_val else 0.
+
     if max_dur is not None and target_shape is not None:
         interpolated = []
         for record in records:
             interpolated.append(
                 interp_spectrogram(
-                    record, max_dur, target_shape, spect_params.normalize
+                    record, max_dur, target_shape, spect_params.normalize, fill_value
                 ))
         with ProgressBar():
             path_shape_tuples = dask.compute(*interpolated)
@@ -585,7 +583,7 @@ def prep_unit_dataset(
 
         padded = []
         for record in records:
-            padded.append(pad_spectrogram(record, pad_length))
+            padded.append(pad_spectrogram(record, pad_length, padval=fill_value))
         with ProgressBar():
             path_shape_tuples = dask.compute(*padded)
 

From de4e58f91b8a4a079784a50c6dde0262b9e577fd Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Thu, 11 Jan 2024 09:50:31 -0500
Subject: [PATCH 131/150] Rename 'unit_dataset' -> 'segment_dataset' in prep/

---
 src/vak/prep/__init__.py                      |  4 +--
 .../prep/parametric_umap/dataset_arrays.py    |  4 +--
 .../prep/parametric_umap/parametric_umap.py   |  6 ++--
 src/vak/prep/segment_dataset/__init__.py      |  4 +++
 .../segment_dataset.py}                       | 32 +++++++++++--------
 src/vak/prep/unit_dataset/__init__.py         |  4 ---
 src/vak/prep/vae/segment_vae.py               |  6 ++--
 7 files changed, 33 insertions(+), 27 deletions(-)
 create mode 100644 src/vak/prep/segment_dataset/__init__.py
 rename src/vak/prep/{unit_dataset/unit_dataset.py => segment_dataset/segment_dataset.py} (95%)
 delete mode 100644 src/vak/prep/unit_dataset/__init__.py

diff --git a/src/vak/prep/__init__.py b/src/vak/prep/__init__.py
index 492da4dc6..503f030f3 100644
--- a/src/vak/prep/__init__.py
+++ b/src/vak/prep/__init__.py
@@ -7,7 +7,7 @@
     prep_,
     sequence_dataset,
     spectrogram_dataset,
-    unit_dataset,
+    segment_dataset,
     vae,
 )
 from .prep_ import prep
@@ -22,6 +22,6 @@
     "prep_",
     "sequence_dataset",
     "spectrogram_dataset",
-    "unit_dataset",
+    "segment_dataset",
     "vae",
 ]
diff --git a/src/vak/prep/parametric_umap/dataset_arrays.py b/src/vak/prep/parametric_umap/dataset_arrays.py
index 67e224ae7..bd9b3297a 100644
--- a/src/vak/prep/parametric_umap/dataset_arrays.py
+++ b/src/vak/prep/parametric_umap/dataset_arrays.py
@@ -17,14 +17,14 @@ def move_files_into_split_subdirs(
 ) -> None:
     """Move npy files in dataset into sub-directories, one for each split in the dataset.
 
-    This is run *after* calling :func:`vak.prep.unit_dataset.prep_unit_dataset`
+    This is run *after* calling :func:`vak.prep.segment_dataset.prep_segment_dataset`
     to generate ``dataset_df``.
 
     Parameters
     ----------
     dataset_df : pandas.DataFrame
         A ``pandas.DataFrame`` returned by
-        :func:`vak.prep.unit_dataset.prep_unit_dataset`
+        :func:`vak.prep.segment_dataset.prep_segment_dataset`
         with a ``'split'`` column added, as a result of calling
         :func:`vak.prep.split.unit_dataframe` or because it was added "manually"
         by calling :func:`vak.core.prep.prep_helper.add_split_col` (as is done
diff --git a/src/vak/prep/parametric_umap/parametric_umap.py b/src/vak/prep/parametric_umap/parametric_umap.py
index eb8ceb29b..83b3ac03e 100644
--- a/src/vak/prep/parametric_umap/parametric_umap.py
+++ b/src/vak/prep/parametric_umap/parametric_umap.py
@@ -14,7 +14,7 @@
 from ...common.logging import config_logging_for_cli, log_version
 from ...common.timenow import get_timenow_as_str
 from .. import dataset_df_helper, split
-from ..unit_dataset import prep_unit_dataset
+from ..segment_dataset import prep_segment_dataset
 from . import dataset_arrays
 
 logger = logging.getLogger(__name__)
@@ -211,7 +211,7 @@ def prep_parametric_umap_dataset(
     logger.info(f"Will prepare dataset as directory: {dataset_path}")
 
     # ---- actually make the dataset -----------------------------------------------------------------------------------
-    dataset_df, shape = prep_unit_dataset(
+    dataset_df, shape = prep_segment_dataset(
         audio_format=audio_format,
         output_dir=dataset_path,
         spect_params=spect_params,
@@ -224,7 +224,7 @@ def prep_parametric_umap_dataset(
 
     if dataset_df.empty:
         raise ValueError(
-            "Calling `vak.prep.unit_dataset.prep_unit_dataset` "
+            "Calling `vak.prep.segment_dataset.prep_segment_dataset` "
             "with arguments passed to `vak.core.prep.prep_parametric_umap_dataset` "
             "returned an empty dataframe.\n"
             "Please double-check arguments to `vak.core.prep` function."
diff --git a/src/vak/prep/segment_dataset/__init__.py b/src/vak/prep/segment_dataset/__init__.py
new file mode 100644
index 000000000..2ba4ab7f5
--- /dev/null
+++ b/src/vak/prep/segment_dataset/__init__.py
@@ -0,0 +1,4 @@
+from . import segment_dataset
+from .segment_dataset import prep_segment_dataset
+
+__all__ = ["prep_segment_dataset", "segment_dataset"]
diff --git a/src/vak/prep/unit_dataset/unit_dataset.py b/src/vak/prep/segment_dataset/segment_dataset.py
similarity index 95%
rename from src/vak/prep/unit_dataset/unit_dataset.py
rename to src/vak/prep/segment_dataset/segment_dataset.py
index 6fbda062d..e84f2945c 100644
--- a/src/vak/prep/unit_dataset/unit_dataset.py
+++ b/src/vak/prep/segment_dataset/segment_dataset.py
@@ -1,5 +1,5 @@
-"""Functions for making a dataset of units from sequences,
-as used to train dimensionality reduction models."""
+"""Functions for making a dataset of segments,
+as used to train parametric UMAP and AVA models."""
 from __future__ import annotations
 
 import logging
@@ -90,7 +90,7 @@ def get_segment_list(
     Notes
     -----
     Function used by
-    :func:`vak.prep.unit_dataset.prep_unit_dataset`.
+    :func:`vak.prep.segment_dataset.prep_segment_dataset`.
     """
     data, samplerate = constants.AUDIO_FORMAT_FUNC_MAP[audio_format](
         audio_path
@@ -153,7 +153,7 @@ def spectrogram_from_segment(
     Notes
     -----
     Function used by
-    :func:`vak.prep.unit_dataset.prep_unit_dataset`.
+    :func:`vak.prep.segment_dataset.prep_segment_dataset`.
     """
     data, samplerate = np.array(segment.data), segment.samplerate
     s, f, t = spectrogram(
@@ -386,7 +386,7 @@ def interp_spectrogram(
 ]
 
 
-def prep_unit_dataset(
+def prep_segment_dataset(
     audio_format: str,
     output_dir: str | pathlib.Path,
     spect_params: SpectParamsConfig,
@@ -398,8 +398,14 @@ def prep_unit_dataset(
     max_dur: float | None = None,
     target_shape: tuple[int, int] | None = None,
 ) -> tuple[pd.DataFrame, tuple[int]]:
-    """Prepare a dataset of units from sequences,
-    e.g., all syllables segmented out of a dataset of birdsong.
+    """Prepare a dataset of segments.
+
+    Finds segments with a segmenting algorithm,
+    then computes a spectrogram for each segment
+    and saves in npy files.
+    Finally, assigns each npy file to a split
+    and moves files into split directories
+    inside the directory representing the dataset.
 
     Parameters
     ----------
@@ -452,12 +458,12 @@ def prep_unit_dataset(
 
     Returns
     -------
-    unit_df : pandas.DataFrame
-        A DataFrame representing all the units in the dataset.
+    segment_df : pandas.DataFrame
+        A DataFrame representing all the segments in the dataset.
     shape: tuple
         A tuple representing the shape of all spectrograms in the dataset.
-        The spectrograms of all units are padded so that they are all
-        as wide as the widest unit (i.e, the one with the longest duration).
+        The spectrograms of all segments are padded so that they are all
+        as wide as the widest segment (i.e, the one with the longest duration).
     """
     # pre-conditions ---------------------------------------------------------------------------------------------------
     if audio_format not in constants.VALID_AUDIO_FORMATS:
@@ -609,6 +615,6 @@ def prep_unit_dataset(
                 [path, *record[1:]]
             )
         )
-    unit_df = pd.DataFrame.from_records(new_records, columns=DF_COLUMNS)
+    segment_df = pd.DataFrame.from_records(new_records, columns=DF_COLUMNS)
 
-    return unit_df, shape
+    return segment_df, shape
diff --git a/src/vak/prep/unit_dataset/__init__.py b/src/vak/prep/unit_dataset/__init__.py
deleted file mode 100644
index bf68aa74b..000000000
--- a/src/vak/prep/unit_dataset/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-from . import unit_dataset
-from .unit_dataset import prep_unit_dataset
-
-__all__ = ["prep_unit_dataset", "unit_dataset"]
diff --git a/src/vak/prep/vae/segment_vae.py b/src/vak/prep/vae/segment_vae.py
index 1aedf7fad..4b83fa68c 100644
--- a/src/vak/prep/vae/segment_vae.py
+++ b/src/vak/prep/vae/segment_vae.py
@@ -10,7 +10,7 @@
 from ...common import labels
 from ...config.spect_params import SpectParamsConfig
 from .. import dataset_df_helper, split
-from ..unit_dataset import prep_unit_dataset
+from ..segment_dataset import prep_segment_dataset
 from ..parametric_umap import dataset_arrays
 
 
@@ -116,7 +116,7 @@ def prep_segment_vae_dataset(
     -------
 
     """
-    dataset_df, shape = prep_unit_dataset(
+    dataset_df, shape = prep_segment_dataset(
         audio_format,
         dataset_path,
         spect_params,
@@ -130,7 +130,7 @@ def prep_segment_vae_dataset(
     )
     if dataset_df.empty:
         raise ValueError(
-            "Calling `vak.prep.unit_dataset.prep_unit_dataset` "
+            "Calling `vak.prep.segment_dataset.prep_segment_dataset` "
             "with arguments passed to `vak.core.prep.vae.prep_segment_vae_dataset` "
             "returned an empty dataframe.\n"
             "Please double-check arguments to `vak.core.prep` function."

From a546eebbdddbeef5c0bc5d3544d0957d0eb848ca Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Thu, 11 Jan 2024 09:51:33 -0500
Subject: [PATCH 132/150] Fix up rename 'unit_dataset' -> 'segment_dataset' in
 prep/

---
 src/vak/prep/parametric_umap/dataset_arrays.py  | 2 +-
 src/vak/prep/parametric_umap/parametric_umap.py | 2 +-
 src/vak/prep/prep_.py                           | 2 +-
 src/vak/prep/segment_dataset/segment_dataset.py | 8 ++++----
 src/vak/prep/split/__init__.py                  | 4 ++--
 src/vak/prep/split/split.py                     | 4 ++--
 src/vak/prep/vae/segment_vae.py                 | 6 +++---
 src/vak/prep/vae/vae.py                         | 2 +-
 8 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/src/vak/prep/parametric_umap/dataset_arrays.py b/src/vak/prep/parametric_umap/dataset_arrays.py
index bd9b3297a..2f87c0f55 100644
--- a/src/vak/prep/parametric_umap/dataset_arrays.py
+++ b/src/vak/prep/parametric_umap/dataset_arrays.py
@@ -26,7 +26,7 @@ def move_files_into_split_subdirs(
         A ``pandas.DataFrame`` returned by
         :func:`vak.prep.segment_dataset.prep_segment_dataset`
         with a ``'split'`` column added, as a result of calling
-        :func:`vak.prep.split.unit_dataframe` or because it was added "manually"
+        :func:`vak.prep.split.segment_dataframe` or because it was added "manually"
         by calling :func:`vak.core.prep.prep_helper.add_split_col` (as is done
         for 'predict' when the entire ``DataFrame`` belongs to this
         "split").
diff --git a/src/vak/prep/parametric_umap/parametric_umap.py b/src/vak/prep/parametric_umap/parametric_umap.py
index 83b3ac03e..224a7c366 100644
--- a/src/vak/prep/parametric_umap/parametric_umap.py
+++ b/src/vak/prep/parametric_umap/parametric_umap.py
@@ -268,7 +268,7 @@ def prep_parametric_umap_dataset(
             do_split = True
 
     if do_split:
-        dataset_df = split.unit_dataframe(
+        dataset_df = split.segment_dataframe(
             dataset_df,
             dataset_path,
             labelset=labelset,
diff --git a/src/vak/prep/prep_.py b/src/vak/prep/prep_.py
index 14e729822..383401e7a 100644
--- a/src/vak/prep/prep_.py
+++ b/src/vak/prep/prep_.py
@@ -164,7 +164,7 @@ def prep(
     target_shape : tuple
         Of ints, (target number of frequency bins,
         target number of time bins).
-        Spectrograms of units will be reshaped
+        Spectrograms of segments will be reshaped
         by interpolation to have the specified
         number of frequency and time bins.
         The transformation is only applied if both this
diff --git a/src/vak/prep/segment_dataset/segment_dataset.py b/src/vak/prep/segment_dataset/segment_dataset.py
index e84f2945c..98d18e857 100644
--- a/src/vak/prep/segment_dataset/segment_dataset.py
+++ b/src/vak/prep/segment_dataset/segment_dataset.py
@@ -70,7 +70,7 @@ def get_segment_list(
     audio_format : str
         String representing audio file format, e.g. 'wav'.
     context_s : float
-        Number of seconds of "context" around unit to
+        Number of seconds of "context" around segment to
         add, i.e., time before and after the onset
         and offset respectively. Default is 0.005s,
         5 milliseconds.
@@ -333,7 +333,7 @@ def interp_spectrogram(
     target_shape : tuple
         Of ints, (target number of frequency bins,
         target number of time bins).
-        Spectrograms of units will be reshaped
+        Spectrograms of segments will be reshaped
         by interpolation to have the specified
         number of frequency and time bins.
         The transformation is only applied if both this
@@ -436,7 +436,7 @@ def prep_segment_dataset(
         :func:`vak.converters.labelset_to_set`.
         See help for that function for details on how to specify ``labelset``.
     context_s : float
-        Number of seconds of "context" around unit to
+        Number of seconds of "context" around segment to
         add, i.e., time before and after the onset
         and offset respectively. Default is 0.005s,
         5 milliseconds.
@@ -449,7 +449,7 @@ def prep_segment_dataset(
     target_shape : tuple
         Of ints, (target number of frequency bins,
         target number of time bins).
-        Spectrograms of units will be reshaped
+        Spectrograms of segments will be reshaped
         by interpolation to have the specified
         number of frequency and time bins.
         The transformation is only applied if both this
diff --git a/src/vak/prep/split/__init__.py b/src/vak/prep/split/__init__.py
index e8c9a001e..c3114279b 100644
--- a/src/vak/prep/split/__init__.py
+++ b/src/vak/prep/split/__init__.py
@@ -1,8 +1,8 @@
 from . import algorithms
-from .split import frame_classification_dataframe, unit_dataframe
+from .split import frame_classification_dataframe, segment_dataframe
 
 __all__ = [
     "algorithms",
     "frame_classification_dataframe",
-    "unit_dataframe",
+    "segment_dataframe",
 ]
diff --git a/src/vak/prep/split/split.py b/src/vak/prep/split/split.py
index 23d37dd49..932cfe5bc 100644
--- a/src/vak/prep/split/split.py
+++ b/src/vak/prep/split/split.py
@@ -178,7 +178,7 @@ def frame_classification_dataframe(
     return dataset_df
 
 
-def unit_dataframe(
+def segment_dataframe(
     dataset_df: pd.DataFrame,
     dataset_path: str | pathlib.Path,
     labelset: set,
@@ -187,7 +187,7 @@ def unit_dataframe(
     val_dur: float | None = None,
 ):
     """Create datasets splits from a dataframe
-    representing a unit dataset.
+    representing a segment dataset.
 
     Splits dataset into training, test, and (optionally) validation subsets,
     specified by their duration.
diff --git a/src/vak/prep/vae/segment_vae.py b/src/vak/prep/vae/segment_vae.py
index 4b83fa68c..12a19894b 100644
--- a/src/vak/prep/vae/segment_vae.py
+++ b/src/vak/prep/vae/segment_vae.py
@@ -69,7 +69,7 @@ def prep_segment_vae_dataset(
         :func:`vak.converters.labelset_to_set`.
         See help for that function for details on how to specify ``labelset``.
     context_s : float
-        Number of seconds of "context" around unit to
+        Number of seconds of "context" around segment to
         add, i.e., time before and after the onset
         and offset respectively. Default is 0.005s,
         5 milliseconds.
@@ -82,7 +82,7 @@ def prep_segment_vae_dataset(
     target_shape : tuple
         Of ints, (target number of frequency bins,
         target number of time bins).
-        Spectrograms of units will be reshaped
+        Spectrograms of segments will be reshaped
         by interpolation to have the specified
         number of frequency and time bins.
         The transformation is only applied if both this
@@ -174,7 +174,7 @@ def prep_segment_vae_dataset(
             do_split = True
 
     if do_split:
-        dataset_df = split.unit_dataframe(
+        dataset_df = split.segment_dataframe(
             dataset_df,
             dataset_path,
             labelset=labelset,
diff --git a/src/vak/prep/vae/vae.py b/src/vak/prep/vae/vae.py
index c43065e6e..ca77914dd 100644
--- a/src/vak/prep/vae/vae.py
+++ b/src/vak/prep/vae/vae.py
@@ -110,7 +110,7 @@ def prep_vae_dataset(
     target_shape : tuple
         Of ints, (target number of frequency bins,
         target number of time bins).
-        Spectrograms of units will be reshaped
+        Spectrograms of segments will be reshaped
         by interpolation to have the specified
         number of frequency and time bins.
         The transformation is only applied if both this

From cfb23c49db81d3f0ecdcc893fc00340063347fd7 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Thu, 11 Jan 2024 11:39:20 -0500
Subject: [PATCH 133/150] Rename prep/parametric_umap/dataset_arrays.py ->
 prep/segment_dataset/make_splits.py, remove unused parameter 'purpose' from
 (renamed) make_splits function

---
 .../make_splits.py}                           | 35 ++++++++-----------
 1 file changed, 15 insertions(+), 20 deletions(-)
 rename src/vak/prep/{parametric_umap/dataset_arrays.py => segment_dataset/make_splits.py} (80%)

diff --git a/src/vak/prep/parametric_umap/dataset_arrays.py b/src/vak/prep/segment_dataset/make_splits.py
similarity index 80%
rename from src/vak/prep/parametric_umap/dataset_arrays.py
rename to src/vak/prep/segment_dataset/make_splits.py
index 2f87c0f55..a88858e43 100644
--- a/src/vak/prep/parametric_umap/dataset_arrays.py
+++ b/src/vak/prep/segment_dataset/make_splits.py
@@ -12,8 +12,8 @@
 logger = logging.getLogger(__name__)
 
 
-def move_files_into_split_subdirs(
-    dataset_df: pd.DataFrame, dataset_path: pathlib.Path, purpose: str
+def make_splits(
+    dataset_df: pd.DataFrame, dataset_path: pathlib.Path
 ) -> None:
     """Move npy files in dataset into sub-directories, one for each split in the dataset.
 
@@ -25,18 +25,14 @@ def move_files_into_split_subdirs(
     dataset_df : pandas.DataFrame
         A ``pandas.DataFrame`` returned by
         :func:`vak.prep.segment_dataset.prep_segment_dataset`
-        with a ``'split'`` column added, as a result of calling
-        :func:`vak.prep.split.segment_dataframe` or because it was added "manually"
-        by calling :func:`vak.core.prep.prep_helper.add_split_col` (as is done
-        for 'predict' when the entire ``DataFrame`` belongs to this
-        "split").
+        with a ``'split'`` column added. The ```split'`` is added
+         as a result of calling :func:`vak.prep.split.segment_dataframe`,
+        or because it was added "manually"
+        by calling :func:`vak.core.prep.prep_helper.add_split_col`
+        (as is done for 'predict' when the entire ``DataFrame``
+        belongs to this "split").
     dataset_path : pathlib.Path
         Path to directory that represents dataset.
-    purpose: str
-        A string indicating what the dataset will be used for.
-        One of {'train', 'eval', 'predict', 'learncurve'}.
-        Determined by :func:`vak.core.prep.prep`
-        using the TOML configuration file.
 
     Returns
     -------
@@ -104,11 +100,10 @@ def move_files_into_split_subdirs(
         dataset_df.loc[split_df.index, "spect_path"] = new_spect_paths
 
     # ---- clean up after moving/copying -------------------------------------------------------------------------------
-    # remove any directories that we just emptied
-    if moved_spect_paths:
-        unique_parents = set(
-            [moved_spect.parent for moved_spect in moved_spect_paths]
-        )
-        for parent in unique_parents:
-            if len(list(parent.iterdir())) < 1:
-                shutil.rmtree(parent)
+    # Remove any npy files that were *not* added to a split
+    npy_files_not_in_split = sorted(
+        dataset_path.glob(f"*npy")
+    )
+    if len(npy_files_not_in_split) > 0:
+        for npy_file in npy_files_not_in_split:
+            npy_file.unlink()

From 7f70781309dac7c4dcc29da261f6681d77a15926 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Thu, 11 Jan 2024 11:39:58 -0500
Subject: [PATCH 134/150] Import make_splits function in
 src/vak/prep/segment_dataset/__init__.py

---
 src/vak/prep/segment_dataset/__init__.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/src/vak/prep/segment_dataset/__init__.py b/src/vak/prep/segment_dataset/__init__.py
index 2ba4ab7f5..ca663ee4f 100644
--- a/src/vak/prep/segment_dataset/__init__.py
+++ b/src/vak/prep/segment_dataset/__init__.py
@@ -1,4 +1,5 @@
 from . import segment_dataset
+from .make_splits import make_splits
 from .segment_dataset import prep_segment_dataset
 
-__all__ = ["prep_segment_dataset", "segment_dataset"]
+__all__ = ["make_splits", "prep_segment_dataset", "segment_dataset"]

From e5361ce3954a67a7db2e2b02a0adf82d880f6f12 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Thu, 11 Jan 2024 11:40:15 -0500
Subject: [PATCH 135/150] Use segment_dataset.make_splits in
 src/vak/prep/parametric_umap/parametric_umap.py

---
 src/vak/prep/parametric_umap/parametric_umap.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/src/vak/prep/parametric_umap/parametric_umap.py b/src/vak/prep/parametric_umap/parametric_umap.py
index 224a7c366..4d2a595ae 100644
--- a/src/vak/prep/parametric_umap/parametric_umap.py
+++ b/src/vak/prep/parametric_umap/parametric_umap.py
@@ -14,8 +14,7 @@
 from ...common.logging import config_logging_for_cli, log_version
 from ...common.timenow import get_timenow_as_str
 from .. import dataset_df_helper, split
-from ..segment_dataset import prep_segment_dataset
-from . import dataset_arrays
+from ..segment_dataset import prep_segment_dataset, make_splits
 
 logger = logging.getLogger(__name__)
 
@@ -308,7 +307,7 @@ def prep_parametric_umap_dataset(
         labelmap = None
 
     # ---- make arrays that represent final dataset --------------------------------------------------------------------
-    dataset_arrays.move_files_into_split_subdirs(
+    make_splits(
         dataset_df,
         dataset_path,
         purpose,

From dfbcba778df8ffb23a4ee5c990a38bf18f0242e6 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Thu, 11 Jan 2024 11:40:47 -0500
Subject: [PATCH 136/150] Use segment_dataset.make_splits function in
 src/vak/prep/vae/segment_vae.py

---
 src/vak/prep/vae/segment_vae.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/src/vak/prep/vae/segment_vae.py b/src/vak/prep/vae/segment_vae.py
index 12a19894b..56064608d 100644
--- a/src/vak/prep/vae/segment_vae.py
+++ b/src/vak/prep/vae/segment_vae.py
@@ -10,8 +10,7 @@
 from ...common import labels
 from ...config.spect_params import SpectParamsConfig
 from .. import dataset_df_helper, split
-from ..segment_dataset import prep_segment_dataset
-from ..parametric_umap import dataset_arrays
+from ..segment_dataset import prep_segment_dataset, make_splits
 
 
 logger = logging.getLogger(__name__)
@@ -212,10 +211,9 @@ def prep_segment_vae_dataset(
             json.dump(labelmap, fp)
 
     # ---- make arrays that represent final dataset --------------------------------------------------------------------
-    dataset_arrays.move_files_into_split_subdirs(
+    make_splits(
         dataset_df,
         dataset_path,
-        purpose,
     )
     #
     # ---- if purpose is learncurve, additionally prep splits for that -----------------------------------------------

From c001b2408d66881a0e5e6e2e4f40b7da9e2bef06 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Thu, 11 Jan 2024 11:40:57 -0500
Subject: [PATCH 137/150] Fixup use make_splits in
 src/vak/prep/parametric_umap/parametric_umap.py

---
 src/vak/prep/parametric_umap/parametric_umap.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/src/vak/prep/parametric_umap/parametric_umap.py b/src/vak/prep/parametric_umap/parametric_umap.py
index 4d2a595ae..a3402dd9b 100644
--- a/src/vak/prep/parametric_umap/parametric_umap.py
+++ b/src/vak/prep/parametric_umap/parametric_umap.py
@@ -310,7 +310,6 @@ def prep_parametric_umap_dataset(
     make_splits(
         dataset_df,
         dataset_path,
-        purpose,
     )
     #
     # ---- if purpose is learncurve, additionally prep splits for that -----------------------------------------------

From c61a881c2f2a119bfe5c3aa9e5da3b1202d2d114 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 19 Jan 2024 13:09:13 -0500
Subject: [PATCH 138/150] Add src/vak/prep/segment_dataset/learncurve.py

---
 src/vak/prep/segment_dataset/learncurve.py | 124 +++++++++++++++++++++
 1 file changed, 124 insertions(+)
 create mode 100644 src/vak/prep/segment_dataset/learncurve.py

diff --git a/src/vak/prep/segment_dataset/learncurve.py b/src/vak/prep/segment_dataset/learncurve.py
new file mode 100644
index 000000000..601fe6c02
--- /dev/null
+++ b/src/vak/prep/segment_dataset/learncurve.py
@@ -0,0 +1,124 @@
+"""Functionality to prepare subsets of the 'train' split of segment datasets,
+for generating a learning curve."""
+from __future__ import annotations
+
+import logging
+import pathlib
+from typing import Sequence
+
+import attrs
+import dask.bag as db
+import numpy as np
+import pandas as pd
+from dask.diagnostics import ProgressBar
+
+from ... import common, datasets
+from .. import split
+
+logger = logging.getLogger(__name__)
+
+
+def make_subsets_from_dataset_df(
+    dataset_df: pd.DataFrame,
+    train_set_durs: Sequence[float],
+    num_replicates: int,
+    dataset_path: pathlib.Path,
+    labelmap: dict,
+) -> pd.DataFrame:
+    """Make subsets of the training data split for a learning curve.
+
+     Makes subsets given a dataframe representing the entire dataset,
+     with one subset for each combination of (training set duration,
+     replicate number). Each subset is randomly drawn
+     from the total training split.
+
+     Uses :func:`vak.prep.split.segment_dataset` to make
+     subsets of the training data from ``dataset_df``.
+
+     A new column will be added to the dataframe, `'subset'`,
+     and additional rows for each subset.
+     The dataframe is returned with these subsets added.
+     (The `'split'` for these rows will still be `'train'`.)
+
+     Parameters
+     ----------
+     dataset_df : pandas.DataFrame
+         Dataframe representing a dataset for frame classification models.
+         It is returned by
+         :func:`vak.prep.segment_dataset.prep_segment_dataset`,
+         and has a ``'split'`` column added.
+     train_set_durs : list
+         Durations in seconds of subsets taken from training data
+         to create a learning curve, e.g., `[5., 10., 15., 20.]`.
+     num_replicates : int
+         number of times to replicate training for each training set duration
+         to better estimate metrics for a training set of that size.
+         Each replicate uses a different randomly drawn subset of the training
+         data (but of the same duration).
+     dataset_path : str, pathlib.Path
+         Directory where splits will be saved.
+
+     Returns
+     -------
+     dataset_df_out : pandas.DataFrame
+         A pandas.DataFrame that has the original splits
+         from ``dataset_df``, as well as the additional subsets
+         of the training data added, along with additional
+         columns, ``'subset', 'train_dur', 'replicate_num'``,
+         that are used by :mod:`vak`.
+         Other functions like :func:`vak.learncurve.learncurve`
+         specify a specific subset of the training data
+         by getting the subset name with the function
+         :func:`vak.common.learncurve.get_train_dur_replicate_split_name`,
+         and then filtering ``dataset_df_out`` with that name
+         using the 'subset' column.
+    """
+    dataset_path = pathlib.Path(dataset_path)
+
+    # get just train split, to pass to split.dataframe
+    # so we don't end up with other splits in the training set
+    train_split_df = dataset_df[dataset_df["split"] == "train"].copy()
+    labelset = set([k for k in labelmap.keys() if k != "unlabeled"])
+
+    # will concat after loop, then use ``csv_path`` to replace
+    # original dataset df with this one
+    subsets_df = []
+    for train_dur in train_set_durs:
+        logger.info(
+            f"Subsetting training set for training set of duration: {train_dur}",
+        )
+        for replicate_num in range(1, num_replicates + 1):
+            train_dur_replicate_subset_name = (
+                common.learncurve.get_train_dur_replicate_subset_name(
+                    train_dur, replicate_num
+                )
+            )
+
+            train_dur_replicate_df = split.segment_dataframe(
+                # copy to avoid mutating original train_split_df
+                train_split_df.copy(),
+                dataset_path,
+                train_dur=train_dur,
+                labelset=labelset,
+            )
+            # remove rows where split set to 'None'
+            train_dur_replicate_df = train_dur_replicate_df[
+                train_dur_replicate_df.split == "train"
+            ]
+            # next line, make split name in csv match the split name used for directory in dataset dir
+            train_dur_replicate_df["subset"] = train_dur_replicate_subset_name
+            train_dur_replicate_df["train_dur"] = train_dur
+            train_dur_replicate_df["replicate_num"] = replicate_num
+            subsets_df.append(train_dur_replicate_df)
+
+    subsets_df = pd.concat(subsets_df)
+
+    # keep the same validation, test, and total train sets by concatenating them with the train subsets
+    dataset_df["subset"] = None  # add column but have it be empty
+    dataset_df = pd.concat((subsets_df, dataset_df))
+    # We reset the entire index across all splits, instead of repeating indices,
+    # and we set drop=False because we don't want to add a new column 'index' or 'level_0'.
+    # Need to do this again after calling `make_npy_files_for_each_split` since we just
+    # did `pd.concat` with the original dataframe
+    dataset_df = dataset_df.reset_index(drop=True)
+    return dataset_df

From b2e166af17cca62a240f37d0eadf8211a1f83d56 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 19 Jan 2024 13:09:13 -0500
Subject: [PATCH 139/150] Use
 segment_dataset.learncurve.make_subsets_from_dataset_df in
 src/vak/prep/parametric_umap/parametric_umap.py

---
 .../prep/parametric_umap/parametric_umap.py   | 26 +++++++++----------
 1 file changed, 12 insertions(+), 14 deletions(-)

diff --git a/src/vak/prep/parametric_umap/parametric_umap.py b/src/vak/prep/parametric_umap/parametric_umap.py
index a3402dd9b..bec2835d6 100644
--- a/src/vak/prep/parametric_umap/parametric_umap.py
+++ b/src/vak/prep/parametric_umap/parametric_umap.py
@@ -14,7 +14,8 @@
 from ...common.logging import config_logging_for_cli, log_version
 from ...common.timenow import get_timenow_as_str
 from .. import dataset_df_helper, split
-from ..segment_dataset import prep_segment_dataset, make_splits
+from ..segment_dataset import learncurve, make_splits, prep_segment_dataset
+
 
 logger = logging.getLogger(__name__)
 
@@ -311,19 +312,16 @@ def prep_parametric_umap_dataset(
         dataset_df,
         dataset_path,
     )
-    #
-    # ---- if purpose is learncurve, additionally prep splits for that -----------------------------------------------
-    # if purpose == 'learncurve':
-    #     dataset_df = make_learncurve_splits_from_dataset_df(
-    #         dataset_df,
-    #         train_set_durs,
-    #         num_replicates,
-    #         dataset_path,
-    #         labelmap,
-    #         audio_format,
-    #         spect_key,
-    #         timebins_key,
-    #     )
+
+    # ---- if purpose is learncurve, additionally prep splits for that -------------------------------------------------
+    if purpose == 'learncurve':
+        dataset_df = learncurve.make_subsets_from_dataset_df(
+            dataset_df,
+            train_set_durs,
+            num_replicates,
+            dataset_path,
+            labelmap,
+        )
 
     # ---- save csv file that captures provenance of source data -------------------------------------------------------
     logger.info(f"Saving dataset csv file: {dataset_csv_path}")

From 73108a9ecd865e9fbdfc01f36d266bf45e3365ba Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 19 Jan 2024 13:09:13 -0500
Subject: [PATCH 140/150] Remove unused imports in
 src/vak/prep/segment_dataset/learncurve.py

---
 src/vak/prep/segment_dataset/learncurve.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/src/vak/prep/segment_dataset/learncurve.py b/src/vak/prep/segment_dataset/learncurve.py
index 601fe6c02..9bdc97bfb 100644
--- a/src/vak/prep/segment_dataset/learncurve.py
+++ b/src/vak/prep/segment_dataset/learncurve.py
@@ -6,13 +6,9 @@
 import pathlib
 from typing import Sequence
 
-import attrs
-import dask.bag as db
-import numpy as np
 import pandas as pd
-from dask.diagnostics import ProgressBar
 
-from ... import common, datasets
+from ... import common
 from .. import split
 
 logger = logging.getLogger(__name__)

From 575ff6ea596b030750bb5f3f2e9c488de9c2ae0a Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 19 Jan 2024 13:09:13 -0500
Subject: [PATCH 141/150] Import learncurve in
 src/vak/prep/segment_dataset/__init__.py

---
 src/vak/prep/segment_dataset/__init__.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/src/vak/prep/segment_dataset/__init__.py b/src/vak/prep/segment_dataset/__init__.py
index ca663ee4f..22fc64d54 100644
--- a/src/vak/prep/segment_dataset/__init__.py
+++ b/src/vak/prep/segment_dataset/__init__.py
@@ -1,5 +1,5 @@
-from . import segment_dataset
+from . import learncurve, segment_dataset
 from .make_splits import make_splits
 from .segment_dataset import prep_segment_dataset
 
-__all__ = ["make_splits", "prep_segment_dataset", "segment_dataset"]
+__all__ = ["learncurve", "make_splits", "prep_segment_dataset", "segment_dataset"]

From 00cabc10360e858327c18ee4f7bf180785f655cb Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 19 Jan 2024 13:09:13 -0500
Subject: [PATCH 142/150] Add newline in
 src/vak/prep/segment_dataset/learncurve.py

---
 src/vak/prep/segment_dataset/learncurve.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/src/vak/prep/segment_dataset/learncurve.py b/src/vak/prep/segment_dataset/learncurve.py
index 9bdc97bfb..be530b8c0 100644
--- a/src/vak/prep/segment_dataset/learncurve.py
+++ b/src/vak/prep/segment_dataset/learncurve.py
@@ -11,6 +11,7 @@
 from ... import common
 from .. import split
 
+
 logger = logging.getLogger(__name__)
 
 

From 8f3a609522ee884aa59a4fc72adb01c273233727 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 19 Jan 2024 13:09:13 -0500
Subject: [PATCH 143/150] WIP: Add tests/test_prep/test_segment_dataset/

---
 .../test_segment_dataset/__init__.py          |  0
 .../test_segment_dataset/test_learncurve.py   |  2 +
 .../test_segment_dataset/test_make_splits.py  |  2 +
 .../test_segment_dataset.py                   | 41 +++++++++++++++++++
 4 files changed, 45 insertions(+)
 create mode 100644 tests/test_prep/test_segment_dataset/__init__.py
 create mode 100644 tests/test_prep/test_segment_dataset/test_learncurve.py
 create mode 100644 tests/test_prep/test_segment_dataset/test_make_splits.py
 create mode 100644 tests/test_prep/test_segment_dataset/test_segment_dataset.py

diff --git a/tests/test_prep/test_segment_dataset/__init__.py b/tests/test_prep/test_segment_dataset/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_prep/test_segment_dataset/test_learncurve.py b/tests/test_prep/test_segment_dataset/test_learncurve.py
new file mode 100644
index 000000000..10d571190
--- /dev/null
+++ b/tests/test_prep/test_segment_dataset/test_learncurve.py
@@ -0,0 +1,2 @@
+def test_make_subsets_from_dataset_df():
+    assert False
diff --git a/tests/test_prep/test_segment_dataset/test_make_splits.py b/tests/test_prep/test_segment_dataset/test_make_splits.py
new file mode 100644
index 000000000..8bd452381
--- /dev/null
+++ b/tests/test_prep/test_segment_dataset/test_make_splits.py
@@ -0,0 +1,2 @@
+def test_make_splits():
+    assert False
diff --git a/tests/test_prep/test_segment_dataset/test_segment_dataset.py b/tests/test_prep/test_segment_dataset/test_segment_dataset.py
new file mode 100644
index 000000000..3f681ad63
--- /dev/null
+++ b/tests/test_prep/test_segment_dataset/test_segment_dataset.py
@@ -0,0 +1,41 @@
+class TestSegment:
+    def test_init(self):
+        assert False
+
+
+def test_get_segment_list():
+    assert False
+
+
+def test_spectrogram_from_segment():
+    # TODO: mock calling spectrogram
+    assert False
+
+
+class TestSpectToSave:
+    def test_init(self):
+        assert False
+
+
+def test_save_spect():
+    assert False
+
+
+def test_abspath():
+    assert False
+
+
+def test_make_spect_return_record():
+    assert False
+
+
+def test_pad_spectrogram():
+    assert False
+
+
+def test_interp_spectrogram():
+    assert False
+
+
+def test_prep_segment_dataset():
+    assert False

From e640ef451eeabddc078bbf3254f0e92280fb86de Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 19 Jan 2024 13:09:13 -0500
Subject: [PATCH 144/150] Fix imports in
 src/vak/prep/parametric_umap/__init__.py

---
 src/vak/prep/parametric_umap/__init__.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/src/vak/prep/parametric_umap/__init__.py b/src/vak/prep/parametric_umap/__init__.py
index fb80f20ef..30427dabb 100644
--- a/src/vak/prep/parametric_umap/__init__.py
+++ b/src/vak/prep/parametric_umap/__init__.py
@@ -1,7 +1,5 @@
-from . import dataset_arrays
 from .parametric_umap import prep_parametric_umap_dataset
 
 __all__ = [
-    "dataset_arrays",
     "prep_parametric_umap_dataset",
 ]

From a4272aab886ba97e917c0ab8ecc7cfe062aa0f54 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 19 Jan 2024 13:09:13 -0500
Subject: [PATCH 145/150] Add ref to AVA code in src/vak/models/ava.py

---
 src/vak/models/ava.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/src/vak/models/ava.py b/src/vak/models/ava.py
index d7a5ce976..70f21537d 100644
--- a/src/vak/models/ava.py
+++ b/src/vak/models/ava.py
@@ -1,8 +1,11 @@
 """Autoencoded Vocal Analysis (AVA) model [1]_.
+Code is adapted from [2]_.
 
 .. [1] Goffinet, J., Brudner, S., Mooney, R., & Pearson, J. (2021).
    Low-dimensional learned feature spaces quantify individual and group differences in vocal repertoires.
    eLife, 10:e67855. https://doi.org/10.7554/eLife.67855
+
+.. [2] https://github.com/pearsonlab/autoencoded-vocal-analysis
 """
 from __future__ import annotations
 

From 9e608e92338b6b03e784a54b991ba4cf36692af9 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 19 Jan 2024 13:09:13 -0500
Subject: [PATCH 146/150] WIP: Revise docstrings in src/vak/nets/ava.py

---
 src/vak/nets/ava.py | 141 ++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 130 insertions(+), 11 deletions(-)

diff --git a/src/vak/nets/ava.py b/src/vak/nets/ava.py
index 88565eeea..b6bbfbee1 100644
--- a/src/vak/nets/ava.py
+++ b/src/vak/nets/ava.py
@@ -1,3 +1,12 @@
+"""AVA variational autoencoder, as described in [1]_.
+Code is adapted from [2]_.
+
+.. [1] Goffinet, J., Brudner, S., Mooney, R., & Pearson, J. (2021).
+   Low-dimensional learned feature spaces quantify individual and group differences in vocal repertoires.
+   eLife, 10:e67855. https://doi.org/10.7554/eLife.67855
+
+.. [2] https://github.com/pearsonlab/autoencoded-vocal-analysis
+"""
 from __future__ import annotations
 
 from typing import Sequence
@@ -9,6 +18,11 @@
 
 
 class FullyConnectedLayers(nn.Module):
+    """Module containing two fully-connected layers.
+
+    This module is used to parametrize :math:`\mu`
+    and :math:`\Sigma` in AVA.
+    """
     def __init__(self, n_features: Sequence[int]):
         super().__init__()
         self.layer = nn.Sequential(
@@ -21,8 +35,33 @@ def forward(self, x):
 
 
 class AVA(nn.Module):
-    """
+    """AVA variational autoencoder, as described in [1]_.
+    Code is adapted from [2]_.
+
+    Attributes
+    ----------
+    input_shape
+    in_channels
+    x_shape
+    x_dim
+    encoder
+    fc_view
+    in_fc_dims
+    shared_encoder_fc
+    mu_fc
+    cov_factor_fc
+    cov_diag_fc
+    decoder_fc
+    decoder
+
+
+    References
+    ----------
+    .. [1] Goffinet, J., Brudner, S., Mooney, R., & Pearson, J. (2021).
+       Low-dimensional learned feature spaces quantify individual and group differences in vocal repertoires.
+       eLife, 10:e67855. https://doi.org/10.7554/eLife.67855
 
+    .. [2] https://github.com/pearsonlab/autoencoded-vocal-analysis
     """
     def __init__(
         self,
@@ -31,14 +70,34 @@ def __init__(
         fc_dims: Sequence[int] = (1024, 256, 64),
         z_dim: int = 32,
     ):
-        """
+        """Initalize a new instance of
+        an AVA variational autoencoder.
 
         Parameters
         ----------
-        input_shape
-        encoder_channels
-        fc_dims
-        z_dim
+        input_shape : Sequence
+            Shape of input to network, a fixed size
+            for all spectrograms.
+            Tuple/list of integers, with dimensions
+            (channels, frequency bins, time bins).
+            Default is ``(1, 128, 128)``.
+        encoder_channels : Sequence
+            Number of channels in convolutional layers
+            of encoder. Tuple/list of integers.
+            Default is ``(8, 8, 16, 16, 24, 24, 32)``.
+        fc_dims : Sequence
+            Dimensionality of fully-connected layers.
+            Tuple/list of integers.
+            These values are used for the linear layers
+            in the encoder (``self.shared_encoder_fc``)
+            after passing through the convolutional layers,
+            as well as the linear layers
+            that are used to parametrize :math:`\mu` and
+            :math:`\Sigma`.
+            Default is (1024, 256, 64).
+        z_dim : int
+            Dimensionality of latent space.
+            Default is 32.
         """
         super().__init__()
 
@@ -125,7 +184,18 @@ def __init__(
         self.decoder = nn.Sequential(*modules)
 
     def encode(self, x):
-        """
+        """Encode a spectrogram ``x``
+        by mapping it to a vector :math:`z`
+        in latent space.
+
+        Parameters
+        ----------
+        x : torch.Tensor
+
+        Returns
+        -------
+        z : torch.Tensor
+        latent_dist : torch.Tensor
         """
         x = self.encoder(x)
         x = torch.flatten(x, start_dim=1)
@@ -137,19 +207,68 @@ def encode(self, x):
         return z, latent_dist
 
     def decode(self, z):
+        """Decode a latent space vector ``z``,
+        mapping it back to a spectrogram :math:`x`
+        in the space of spectrograms :math:`\mathcal{X}`.
+
+        Parameters
+        ----------
+        z : torch.Tensor
+            Output of encoder, with dimensions
+            (batch size, latent space size).
+
+        Returns
+        -------
+        x : torch.Tensor
+            Output of decoder, with shape
+            (batch, channel, frequency bins, time bins).
         """
-        """
-        z = self.decoder_fc(z).view(-1, *self.fc_view)
-        z = self.decoder(z).view(-1, *self.input_shape)
-        return z
+        x = self.decoder_fc(z).view(-1, *self.fc_view)
+        x = self.decoder(x).view(-1, *self.input_shape)
+        return x
 
     @staticmethod
     def reparametrize(mu, cov_factor, cov_diag):
+        """Sample a latent distribution
+        to get the latent embedding :math:`z`.
+
+        Method that encapsulates the reparametrization trick.
+
+        Parameters
+        ----------
+        mu : torch.Tensor
+        cov_factor : torch.Tensor
+        cov_diag : torch.Tensor
+
+        Returns
+        -------
+        z : torch.Tensor
+        latent_dist : LowRankMultivariateNormal
+        """
         latent_dist = LowRankMultivariateNormal(mu, cov_factor, cov_diag)
         z = latent_dist.rsample()
         return z, latent_dist
 
     def forward(self, x):
+        """Pass a spectrogram ``x``
+        through the variational autoencoder:
+        encode, then decode.
+
+        Parameters
+        ----------
+        x : torch.Tensor
+
+        Returns
+        -------
+        x_rec : torch.Tensor
+            Reconstruction of ``x``,
+            output of the decoder.
+        z : torch.Tensor
+            Latent space embedding of ``x``.
+        latent_dist : LowRankMultivariateNormal
+            Distribution parametrized
+            by the output of the encoder.
+        """
         z, latent_dist = self.encode(x)
         x_rec = self.decode(z)
         return x_rec, z, latent_dist

From 5550cb840892728c7cccb41045d3d184325da106 Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 19 Jan 2024 13:09:13 -0500
Subject: [PATCH 147/150] WIP: Revise docstrings in src/vak/nn/loss/vae.py

---
 src/vak/nn/loss/vae.py | 79 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 75 insertions(+), 4 deletions(-)

diff --git a/src/vak/nn/loss/vae.py b/src/vak/nn/loss/vae.py
index 9270763e1..7ad7c826c 100644
--- a/src/vak/nn/loss/vae.py
+++ b/src/vak/nn/loss/vae.py
@@ -1,3 +1,14 @@
+"""Evidence Lower Bound (ELBO) loss for a Variational Auto-Encpoder,
+as used with the Autoencoded Vocal Analysis (AVA) model [1]_.
+Code is adapted from [2]_.
+
+.. [1] Goffinet, J., Brudner, S., Mooney, R., & Pearson, J. (2021).
+   Low-dimensional learned feature spaces quantify individual and group differences in vocal repertoires.
+   eLife, 10:e67855. https://doi.org/10.7554/eLife.67855
+
+.. [2] https://github.com/pearsonlab/autoencoded-vocal-analysis
+"""
+
 from __future__ import annotations
 
 import math
@@ -12,10 +23,39 @@ def vae_elbo_loss(
     x: torch.Tensor,
     z: torch.Tensor,
     x_rec: torch.Tensor,
-    latent_dist: torch.Tensor,
+    latent_dist: torch.distributions.LowRankMultivariateNormal,
     model_precision: float,
     z_dim: int
-):
+) -> torch.Tensor:
+    """Evidence Lower Bound (ELBO) loss for a Variational Auto-Encpoder,
+    as used with the Autoencoded Vocal Analysis (AVA) model [1]_.
+
+    Notes
+    -----
+    Code is adapted from [2]_.
+
+    References
+    ----------
+    .. [1] Goffinet, J., Brudner, S., Mooney, R., & Pearson, J. (2021).
+       Low-dimensional learned feature spaces quantify individual and group differences in vocal repertoires.
+       eLife, 10:e67855. https://doi.org/10.7554/eLife.67855
+
+    .. [2] https://github.com/pearsonlab/autoencoded-vocal-analysis
+
+    Parameters
+    ----------
+    x : torch.Tensor
+    z : torch.Tensor
+    x_rec : torch.Tensor
+    latent_dist
+    model_precision : float
+    z_dim : int
+        Dimensionality of latent space
+
+    Returns
+    -------
+
+    """
     # E_{q(z|x)} p(z)
     elbo = -0.5 * (torch.sum(torch.pow(z, 2) ) + z_dim * torch.log( 2 * PI ))
 
@@ -37,8 +77,26 @@ def vae_elbo_loss(
 
 
 class VaeElboLoss(torch.nn.Module):
-    """"""
+    """Evidence Lower Bound (ELBO) loss for a Variational Auto-Encpoder,
+    as used with the Autoencoded Vocal Analysis (AVA) model [1]_.
+
+    ELBO can be written as
+    :math:`L(\phi, \theta; x) = \text{ln} p_{\theta}(x) - D_{KL}(q_{\phi}(z|x) || p_{\theta}(z|x))`
+    where the first term is the *evidence* for :math:`x`
+    and the second is the Kullback-Leibler divergence between
+    :math:`q_{\phi}` and :math:`p_{\theta}`.
+
+    Notes
+    -----
+    Code is adapted from [2]_.
 
+    References
+    ----------
+    .. [1] Goffinet, J., Brudner, S., Mooney, R., & Pearson, J. (2021).
+       Low-dimensional learned feature spaces quantify individual and group differences in vocal repertoires.
+       eLife, 10:e67855. https://doi.org/10.7554/eLife.67855
+    .. [2] https://github.com/pearsonlab/autoencoded-vocal-analysis
+    """
     def __init__(
         self,
         model_precision: float = 10.0,
@@ -53,8 +111,21 @@ def forward(
         x: torch.Tensor,
         z: torch.Tensor,
         x_rec: torch.Tensor,
-        latent_dist: torch.Tensor,
+        latent_dist: torch.distributions.LowRankMultivariateNormal,
     ):
+        """Compute ELBO loss
+
+        Parameters
+        ----------
+        x
+        z
+        x_rec
+        latent_dist
+
+        Returns
+        -------
+
+        """
         return vae_elbo_loss(
             x=x, z=z, x_rec=x_rec,
             latent_dist=latent_dist, model_precision=self.model_precision,

From 3b8f98a2b0cab122a77df0da8b4f66aadda7bc4d Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 1 Mar 2024 08:48:58 -0500
Subject: [PATCH 148/150] Add AVA configs in tests/data_for_tests/configs

---
 ...ae_learncurve_audio_cbin_annot_notmat.toml | 38 ++++++++++++++++++
 ...ent_vae_train_audio_cbin_annot_notmat.toml | 37 ++++++++++++++++++
 ...dow_vae_train_audio_cbin_annot_notmat.toml | 39 +++++++++++++++++++
 3 files changed, 114 insertions(+)
 create mode 100644 tests/data_for_tests/configs/AVA_segment_vae_learncurve_audio_cbin_annot_notmat.toml
 create mode 100644 tests/data_for_tests/configs/AVA_segment_vae_train_audio_cbin_annot_notmat.toml
 create mode 100644 tests/data_for_tests/configs/AVA_window_vae_train_audio_cbin_annot_notmat.toml

diff --git a/tests/data_for_tests/configs/AVA_segment_vae_learncurve_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/AVA_segment_vae_learncurve_audio_cbin_annot_notmat.toml
new file mode 100644
index 000000000..ac6d86a77
--- /dev/null
+++ b/tests/data_for_tests/configs/AVA_segment_vae_learncurve_audio_cbin_annot_notmat.toml
@@ -0,0 +1,38 @@
+[PREP]
+dataset_type = "vae-segment"
+input_type = "spect"
+data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032312"
+output_dir = "./tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat/AVA_segment_vae"
+audio_format = "cbin"
+annot_format = "notmat"
+labelset = "iabcdefghjk"
+train_dur = 20
+val_dur = 5
+test_dur = 10
+context_s = 0.01
+max_dur = 0.2
+target_shape = [ 128, 128,]
+train_set_durs = [ 4, 6,]
+num_replicates = 2
+
+[SPECT_PARAMS]
+fft_size = 512
+step_size = 256
+transform_type = "log_spect"
+freq_cutoffs = [ 400, 10000,]
+normalize = false
+min_val = -6.0
+max_val = 0.0
+
+[LEARNCURVE]
+model = "AVA"
+batch_size = 64
+num_epochs = 150
+val_step = 500
+ckpt_step = 1000
+num_workers = 16
+device = "cuda"
+root_results_dir = "./tests/data_for_tests/generated/results/train/audio_cbin_annot_notmat/AVA"
+
+[AVA.optimizer]
+lr = 0.001
diff --git a/tests/data_for_tests/configs/AVA_segment_vae_train_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/AVA_segment_vae_train_audio_cbin_annot_notmat.toml
new file mode 100644
index 000000000..52414440b
--- /dev/null
+++ b/tests/data_for_tests/configs/AVA_segment_vae_train_audio_cbin_annot_notmat.toml
@@ -0,0 +1,37 @@
+[PREP]
+dataset_type = "vae-segment"
+input_type = "spect"
+data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032312"
+output_dir = "./tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat/AVA_segment_vae"
+audio_format = "cbin"
+annot_format = "notmat"
+labelset = "iabcdefghjk"
+train_dur = 20
+val_dur = 5
+test_dur = 10
+context_s = 0.01
+max_dur = 0.2
+target_shape = [ 128, 128,]
+
+[SPECT_PARAMS]
+fft_size = 512
+step_size = 256
+transform_type = "log_spect"
+freq_cutoffs = [ 400, 10000,]
+normalize = false
+min_val = -6.0
+max_val = 0.0
+
+[TRAIN]
+model = "AVA"
+batch_size = 64
+num_epochs = 150
+val_step = 500
+ckpt_step = 1000
+num_workers = 16
+device = "cuda"
+root_results_dir = "./tests/data_for_tests/generated/results/train/audio_cbin_annot_notmat/AVA"
+dataset_path = "tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat/AVA_segment_vae/032312-vak-vae-dataset-generated-240111_222325"
+
+[AVA.optimizer]
+lr = 0.001
diff --git a/tests/data_for_tests/configs/AVA_window_vae_train_audio_cbin_annot_notmat.toml b/tests/data_for_tests/configs/AVA_window_vae_train_audio_cbin_annot_notmat.toml
new file mode 100644
index 000000000..7b308915f
--- /dev/null
+++ b/tests/data_for_tests/configs/AVA_window_vae_train_audio_cbin_annot_notmat.toml
@@ -0,0 +1,39 @@
+[PREP]
+dataset_type = "vae-window"
+input_type = "spect"
+data_dir = "./tests/data_for_tests/source/audio_cbin_annot_notmat/gy6or6/032312"
+output_dir = "./tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat/AVA_segment_vae"
+audio_format = "cbin"
+annot_format = "notmat"
+labelset = "iabcdefghjk"
+train_dur = 0.5
+val_dur = 0.2
+test_dur = 0.25
+
+[SPECT_PARAMS]
+fft_size = 512
+step_size = 32
+transform_type = "log_spect_plus_one"
+
+[TRAIN]
+model = "AVA"
+batch_size = 64
+num_epochs = 1
+val_step = 1
+ckpt_step = 1000
+num_workers = 16
+device = "cuda"
+root_results_dir = "./tests/data_for_tests/generated/results/train/audio_cbin_annot_notmat/AVA"
+dataset_path = "tests/data_for_tests/generated/prep/train/audio_cbin_annot_notmat/AVA_segment_vae/032312-vak-vae-dataset-generated-240103_115504"
+
+#[AVA.network]
+#conv1_filters = 8
+#conv2_filters = 16
+#conv_kernel_size = 3
+#conv_stride = 2
+#conv_padding = 1
+#n_features_linear = 32
+#n_components = 2
+
+[AVA.optimizer]
+lr = 0.001

From 36a0a0dd006e63b2ed17a4f17fa37a9c68560e6e Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 12 Apr 2024 20:52:37 -0400
Subject: [PATCH 149/150] WIP: Add tests/test_models/test_vae.py

---
 tests/test_models/test_vae.py | 38 +++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)
 create mode 100644 tests/test_models/test_vae.py

diff --git a/tests/test_models/test_vae.py b/tests/test_models/test_vae.py
new file mode 100644
index 000000000..e56cd1fc9
--- /dev/null
+++ b/tests/test_models/test_vae.py
@@ -0,0 +1,38 @@
+import pytest
+
+import vak
+
+
+class TestConvEncoderUMAP:
+    @pytest.mark.parametrize(
+        'input_shape',
+        [
+            (1, 32, 32),
+            (1, 64, 64),
+        ]
+    )
+    def test_init(self, input_shape):
+        # TODO: actually write this test
+        assert False
+        network = {
+            'encoder': vak.models.ConvEncoderUMAP.definition.network['encoder'](input_shape=input_shape)
+        }
+        model = vak.models.ConvEncoderUMAP(network=network)
+        assert isinstance(model, vak.models.ConvEncoderUMAP)
+        for attr in ('network', 'loss', 'optimizer'):
+            assert hasattr(model, attr)
+            attr_from_definition = getattr(vak.models.convencoder_umap.ConvEncoderUMAP.definition, attr)
+            if isinstance(attr_from_definition, dict):
+                attr_from_model = getattr(model, attr)
+                assert isinstance(attr_from_model, dict)
+                assert attr_from_model.keys() == attr_from_definition.keys()
+                for net_name, net_instance in attr_from_model.items():
+                    assert isinstance(net_instance, attr_from_definition[net_name])
+            else:
+                assert isinstance(getattr(model, attr),
+                                  getattr(vak.models.convencoder_umap.ConvEncoderUMAP.definition, attr))
+        assert hasattr(model, 'metrics')
+        assert isinstance(model.metrics, dict)
+        for metric_name, metric_callable in model.metrics.items():
+            assert isinstance(metric_callable,
+                              vak.models.convencoder_umap.ConvEncoderUMAP.definition.metrics[metric_name])

From d09d577ccef959c0eb306bde7cb1cf026b25d8df Mon Sep 17 00:00:00 2001
From: David Nicholson <nickledave@users.noreply.github.com>
Date: Fri, 12 Apr 2024 20:54:57 -0400
Subject: [PATCH 150/150] WIP: Add tests/test_datasets/test_vae/

---
 tests/test_datasets/test_vae/__init__.py | 0
 tests/test_datasets/test_vae/test_vae.py | 4 ++++
 2 files changed, 4 insertions(+)
 create mode 100644 tests/test_datasets/test_vae/__init__.py
 create mode 100644 tests/test_datasets/test_vae/test_vae.py

diff --git a/tests/test_datasets/test_vae/__init__.py b/tests/test_datasets/test_vae/__init__.py
new file mode 100644
index 000000000..e69de29bb
diff --git a/tests/test_datasets/test_vae/test_vae.py b/tests/test_datasets/test_vae/test_vae.py
new file mode 100644
index 000000000..751ce982a
--- /dev/null
+++ b/tests/test_datasets/test_vae/test_vae.py
@@ -0,0 +1,4 @@
+class TestVAEDataset:
+    def __init__(self):
+        # TODO: write these tests
+        assert False