diff --git a/.github/workflows/ci-linux.yml b/.github/workflows/ci-linux.yml index 051e6f29f..54ee3029f 100644 --- a/.github/workflows/ci-linux.yml +++ b/.github/workflows/ci-linux.yml @@ -13,13 +13,15 @@ jobs: fail-fast: false matrix: python-version: [3.11, 3.12, 3.13] - runs-on: ubuntu-20.04 + runs-on: ubuntu-latest steps: - uses: actions/checkout@v2 - uses: actions/setup-python@v2 with: python-version: ${{ matrix.python-version }} - - uses: excitedleigh/setup-nox@v2.1.0 + - uses: wntrblm/nox@2024.04.15 + with: + python-version: ${{ matrix.python-version }} - name: run tests run: | nox -s test-data-download-source diff --git a/noxfile.py b/noxfile.py index 9245b5876..c65eb815d 100644 --- a/noxfile.py +++ b/noxfile.py @@ -182,7 +182,7 @@ def test_data_download_source(session) -> None: TEST_DATA_GENERATE_SCRIPT = './tests/scripts/generate_data_for_tests.py' -@nox.session(name='test-data-generate', python="3.10") +@nox.session(name='test-data-generate', python=TEST_PYTHONS[1]) def test_data_generate(session) -> None: """Produced 'generated' test data, by running TEST_DATA_GENERATE_SCRIPT on 'source' test data.""" session.install(".[test]") @@ -259,6 +259,9 @@ def test_data_download_generated_all(session) -> None: tf.extractall(path='.') session.log('Fixing paths in .csv files') session.install("pandas") + session.run( + "python", "./tests/scripts/fix_prep_csv_paths.py" + ) GENERATED_TEST_DATA_CI_URL = 'https://osf.io/un2zs/download' @@ -273,3 +276,8 @@ def test_data_download_generated_ci(session) -> None: session.log(f'Extracting downloaded tar: {GENERATED_TEST_DATA_CI_TAR}') with tarfile.open(GENERATED_TEST_DATA_CI_TAR, "r:gz") as tf: tf.extractall(path='.') + session.log('Fixing paths in .csv files') + session.install("pandas") + session.run( + "python", "./tests/scripts/fix_prep_csv_paths.py" + ) \ No newline at end of file diff --git a/src/vak/prep/frame_classification/source_files.py b/src/vak/prep/frame_classification/source_files.py index e5888db9f..0c9a6eba1 100644 --- a/src/vak/prep/frame_classification/source_files.py +++ b/src/vak/prep/frame_classification/source_files.py @@ -1,3 +1,5 @@ +from __future__ import annotations + import logging import pathlib diff --git a/tests/scripts/fix_prep_csv_paths.py b/tests/scripts/fix_prep_csv_paths.py new file mode 100644 index 000000000..533da3acd --- /dev/null +++ b/tests/scripts/fix_prep_csv_paths.py @@ -0,0 +1,89 @@ +"""This script gets run by continuous integration +(in ./github/workflows/ci-{os}.yml files) +so that all the paths are correct on the virtual machines +""" +import argparse +from pathlib import Path + +import pandas as pd + +HERE = Path(__file__).parent +PROJ_ROOT = HERE / ".." / ".." +PROJ_ROOT_ABS = PROJ_ROOT.resolve() # <- used to fix paths!!! +GENERATED_TEST_DATA = PROJ_ROOT / "tests" / "data_for_tests" / "generated" + + +def main( + dry_run: bool = False, + verbose: bool = False, +): + """loads csv files created by `prep` and changes the parent of paths so it's + the local file system, instead of what's on my laptop.` + To get tests to run on CI without FileNotFound errors""" + prep_csvs = sorted(GENERATED_TEST_DATA.glob("**/*prep*csv")) + for prep_csv in prep_csvs: + print( + f"Fixing paths in csv:\n{prep_csv}" + ) + vak_df = pd.read_csv(prep_csv) + for path_column_name in ("audio_path", "annot_path"): + print( + f"Fixing column: {path_column_name}" + ) + paths = vak_df[path_column_name].values.tolist() + paths = [str(path) for path in paths] + new_column = [] + for path_str in paths: + if verbose: + print( + f"path_str: {path_str}" + ) + if path_str == "nan": + new_column.append(path_str) + continue + tests_root_ind = path_str.find('tests/data_for_tests') + if tests_root_ind == -1: + tests_root_ind = path_str.find('../../data_for_tests') + if tests_root_ind == -1: + if ( + path_column_name == 'audio_path' + and 'spect_mat_annot_yarden' in str(prep_csv) + ): + # prep somehow gives root to audio -- from annotation?; we don't need these to exist though + new_column.append(path_str) + continue + raise ValueError( + f"Couldn't find tests_root_ind for path:\n{path_str}" + ) + new_path_str = path_str[tests_root_ind:] # get rid of parent directories + if verbose: + print( + f"new_path_str: {new_path_str}" + ) + new_path = PROJ_ROOT_ABS / new_path_str + if not new_path.exists(): + raise FileNotFoundError( + f"New path does not exist:\n{new_path}" + ) + new_column.append(str(new_path)) + vak_df[path_column_name] = new_column + if not dry_run: + vak_df.to_csv(prep_csv) + +parser = argparse.ArgumentParser() +parser.add_argument( + "--dry-run", + action="store_true", + default=False +) +parser.add_argument( + "--verbose", + action="store_true", + default=False +) +args = parser.parse_args() + +main( + dry_run=args.dry_run, + verbose=args.verbose, +) diff --git a/tests/scripts/generate_data_for_tests.py b/tests/scripts/generate_data_for_tests.py index db54f972f..eea05a7d5 100644 --- a/tests/scripts/generate_data_for_tests.py +++ b/tests/scripts/generate_data_for_tests.py @@ -145,7 +145,7 @@ def generate_test_data( for config_metadata in command_config_metadata: config_path = vaktestdata.constants.GENERATED_TEST_CONFIGS_ROOT / config_metadata.filename logger.info( - f"n\Running 'vak {command}', using config: {config_path.name}" + f"\nRunning 'vak {command}', using config: {config_path.name}" ) vak.cli.cli.cli(command, config_path) diff --git a/tests/scripts/vaktestdata/constants.py b/tests/scripts/vaktestdata/constants.py index 705c93d96..c7901eec8 100644 --- a/tests/scripts/vaktestdata/constants.py +++ b/tests/scripts/vaktestdata/constants.py @@ -5,7 +5,9 @@ from .config_metadata import ConfigMetadata HERE = pathlib.Path(__file__).parent -TEST_DATA_ROOT = HERE / ".." / ".." / "data_for_tests" +# next line: we `resolve` so we don't end up with a weird path that uses ".." / ".." in the csv files representing data, +# which makes it hard to "re-root" paths, e.g. by finding "tests/data_for_tests" and replacing the parent path before it +TEST_DATA_ROOT = (HERE / ".." / ".." / "data_for_tests").resolve() CONFIG_METADATA_JSON_PATH = TEST_DATA_ROOT / "configs" / "configs.json" with CONFIG_METADATA_JSON_PATH.open('r') as fp: CONFIG_METADATA_LIST = json.load(fp)['config_metadata']