vocalpy · NickleDave · Jan 19, 2024 · Jul 17, 2025 · Jul 17, 2025 · Aug 29, 2025
diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml
@@ -13,13 +13,15 @@ jobs:
       fail-fast: false
       matrix:
         python-version: [3.11, 3.12, 3.13]
-    runs-on: ubuntu-20.04
+    runs-on: ubuntu-latest
     steps:
       - uses: actions/checkout@v2
       - uses: actions/setup-python@v2
         with:
           python-version: ${{ matrix.python-version }}
-      - uses: excitedleigh/[email protected]
+      - uses: wntrblm/[email protected]
+        with:
+          python-version: ${{ matrix.python-version }}
       - name: run tests
         run: |
           nox -s test-data-download-source

diff --git a/noxfile.py b/noxfile.py
@@ -182,7 +182,7 @@ def test_data_download_source(session) -> None:
 TEST_DATA_GENERATE_SCRIPT = './tests/scripts/generate_data_for_tests.py'
 
 
-@nox.session(name='test-data-generate', python="3.10")
+@nox.session(name='test-data-generate', python=TEST_PYTHONS[1])
 def test_data_generate(session) -> None:
     """Produced 'generated' test data, by running TEST_DATA_GENERATE_SCRIPT on 'source' test data."""
     session.install(".[test]")
@@ -259,6 +259,9 @@ def test_data_download_generated_all(session) -> None:
         tf.extractall(path='.')
     session.log('Fixing paths in .csv files')
     session.install("pandas")
+    session.run(
+        "python", "./tests/scripts/fix_prep_csv_paths.py"
+    )
 
 
 GENERATED_TEST_DATA_CI_URL = 'https://osf.io/un2zs/download'
@@ -273,3 +276,8 @@ def test_data_download_generated_ci(session) -> None:
     session.log(f'Extracting downloaded tar: {GENERATED_TEST_DATA_CI_TAR}')
     with tarfile.open(GENERATED_TEST_DATA_CI_TAR, "r:gz") as tf:
         tf.extractall(path='.')
+    session.log('Fixing paths in .csv files')
+    session.install("pandas")
+    session.run(
+        "python", "./tests/scripts/fix_prep_csv_paths.py"
+    )
diff --git a/src/vak/prep/frame_classification/source_files.py b/src/vak/prep/frame_classification/source_files.py
@@ -1,3 +1,5 @@
+from __future__ import annotations
+
 import logging
 import pathlib
 

diff --git a/tests/scripts/fix_prep_csv_paths.py b/tests/scripts/fix_prep_csv_paths.py
@@ -0,0 +1,89 @@
+"""This script gets run by continuous integration 
+(in ./github/workflows/ci-{os}.yml files)
+so that all the paths are correct on the virtual machines
+"""
+import argparse
+from pathlib import Path
+
+import pandas as pd
+
+HERE = Path(__file__).parent
+PROJ_ROOT = HERE / ".." / ".."
+PROJ_ROOT_ABS = PROJ_ROOT.resolve()  # <- used to fix paths!!!
+GENERATED_TEST_DATA = PROJ_ROOT / "tests" / "data_for_tests" / "generated"
+
+
+def main(
+    dry_run: bool = False,
+    verbose: bool = False,
+):
+    """loads csv files created by `prep` and changes the parent of paths so it's
+    the local file system, instead of what's on my laptop.`
+    To get tests to run on CI without FileNotFound errors"""
+    prep_csvs = sorted(GENERATED_TEST_DATA.glob("**/*prep*csv"))
+    for prep_csv in prep_csvs:
+        print(
+            f"Fixing paths in csv:\n{prep_csv}"
+        )
+        vak_df = pd.read_csv(prep_csv)
+        for path_column_name in ("audio_path", "annot_path"):
+            print(
+                f"Fixing column: {path_column_name}"
+            )
+            paths = vak_df[path_column_name].values.tolist()
+            paths = [str(path) for path in paths]
+            new_column = []
+            for path_str in paths:
+                if verbose:
+                    print(
+                        f"path_str: {path_str}"
+                    )
+                if path_str == "nan":
+                    new_column.append(path_str)
+                    continue
+                tests_root_ind = path_str.find('tests/data_for_tests')
+                if tests_root_ind == -1:
+                    tests_root_ind = path_str.find('../../data_for_tests')
+                if tests_root_ind == -1:
+                    if (
+                        path_column_name == 'audio_path'
+                        and 'spect_mat_annot_yarden' in str(prep_csv)
+                    ):
+                        # prep somehow gives root to audio -- from annotation?; we don't need these to exist though
+                        new_column.append(path_str)
+                        continue
+                    raise ValueError(
+                        f"Couldn't find tests_root_ind for path:\n{path_str}"
+                    )
+                new_path_str = path_str[tests_root_ind:]  # get rid of parent directories
+                if verbose:
+                    print(
+                        f"new_path_str: {new_path_str}"
+                    )
+                new_path = PROJ_ROOT_ABS / new_path_str
+                if not new_path.exists():
+                    raise FileNotFoundError(
+                        f"New path does not exist:\n{new_path}"
+                    )
+                new_column.append(str(new_path))
+            vak_df[path_column_name] = new_column
+        if not dry_run:
+            vak_df.to_csv(prep_csv)
+
+parser = argparse.ArgumentParser()
+parser.add_argument(
+    "--dry-run",
+    action="store_true",
+    default=False
+)
+parser.add_argument(
+    "--verbose",
+    action="store_true",
+    default=False
+)
+args = parser.parse_args()
+
+main(
+    dry_run=args.dry_run,
+    verbose=args.verbose,
+)
diff --git a/tests/scripts/generate_data_for_tests.py b/tests/scripts/generate_data_for_tests.py
@@ -145,7 +145,7 @@ def generate_test_data(
             for config_metadata in command_config_metadata:
                 config_path = vaktestdata.constants.GENERATED_TEST_CONFIGS_ROOT / config_metadata.filename
                 logger.info(
-                    f"n\Running 'vak {command}', using config: {config_path.name}"
+                    f"\nRunning 'vak {command}', using config: {config_path.name}"
                 )
                 vak.cli.cli.cli(command, config_path)
 

diff --git a/tests/scripts/vaktestdata/constants.py b/tests/scripts/vaktestdata/constants.py
@@ -5,7 +5,9 @@
 from .config_metadata import ConfigMetadata
 
 HERE = pathlib.Path(__file__).parent
-TEST_DATA_ROOT = HERE / ".." / ".." / "data_for_tests"
+# next line: we `resolve` so we don't end up with a weird path that uses ".." / ".." in the csv files representing data,
+# which makes it hard to "re-root" paths, e.g. by finding "tests/data_for_tests" and replacing the parent path before it
+TEST_DATA_ROOT = (HERE / ".." / ".." / "data_for_tests").resolve()
 CONFIG_METADATA_JSON_PATH = TEST_DATA_ROOT / "configs" / "configs.json"
 with CONFIG_METADATA_JSON_PATH.open('r') as fp:
     CONFIG_METADATA_LIST = json.load(fp)['config_metadata']