working on scripts for launching container

QutEcoacoustics · Mar 20, 2024 · 1954f6f · 1954f6f
1 parent 211c97e
commit 1954f6f
Show file tree

Hide file tree

Showing 14 changed files with 235 additions and 86 deletions.
diff --git a/.gitignore b/.gitignore
@@ -1,12 +1,17 @@
 __pycache__
 
 tests/output/*
-
 !test/output/.gitkeep
 
+tests/input/*
+!test/input/.gitkeep
+
 local_scripts/*
 
 *.ipynb_checkpoints/
 *.ipynb_checkpoints/*
 
-.vscode/launch.local.json
+.vscode/launch.local.json
+
+temp
+temp/*
diff --git a/Dockerfile b/Dockerfile
@@ -22,3 +22,4 @@ RUN pip install librosa numpy pytest pytest-mock
 
 RUN useradd -u 1000 -ms /bin/bash appuser
 
+ENV PYTHONPATH="/app:$PYTHONPATH"
diff --git a/pyproject_bak.toml b/pyproject_bak.toml
diff --git a/scripts/classify.ps1 b/scripts/classify.ps1
@@ -0,0 +1,61 @@
+# Argument Parsing
+$source = $args[0]
+$output = $args[1]
+$recognizer = $args[2]
+$image = $args[3]
+if ($null -eq $image) { $image = "qutecoacoustics/perchrunner:latest" }
+
+# Required Parameter Validation
+if ([string]::IsNullOrWhiteSpace($source) -or [string]::IsNullOrWhiteSpace($output) -or [string]::IsNullOrWhiteSpace($recognizer)) {
+    Write-Host "Error: Missing required parameters (--source, --output, --recognizer)"
+    exit 1
+}
+
+# Source Path Checks
+if (-not (Test-Path -Path $source)) {
+    Write-Host "Error: Source path does not exist: $source"
+    exit 1
+}
+elseif ((Test-Path -Path $source -PathType Container) -and ((Get-ChildItem -Path $source).Count -eq 0)) {
+    Write-Host "Error: Source directory is empty: $source"
+    exit 1
+}
+
+# Output Folder Check
+if (-not (Test-Path -Path $output -PathType Container)) {
+    Write-Host "Error: Output folder does not exist: $output"
+    exit 1
+}
+
+# Recognizer Config Mapping
+$recognizer_configs = @{
+    "pw" = "pw.classify.yml"
+    "cgw" = "cgw.classify.yml"
+}
+
+$config_file = $recognizer_configs[$recognizer]
+if ($null -eq $config_file) {
+    Write-Host "Recognizer $recognizer not supported"
+    exit 1
+}
+else {
+    Write-Host "Using config file: $config_file"
+}
+
+# Paths inside the container, to be mounted
+$embeddings_container = "/mnt/embeddings"
+$output_container = "/mnt/output"
+$output_dir = Join-Path $output_container "search_results"
+
+$command = "python /app/src/app.py classify --source $embeddings_container --output $output_container --config_file $config_file"
+
+# Convert to absolute paths
+$absolute_source = (Resolve-Path -Path $source).Path
+$absolute_output = (Resolve-Path -Path $output).Path
+
+Write-Host "launching container with command: $command"
+
+# Launch Docker container
+docker run --user appuser:appuser --rm `
+-v "$absolute_source":$embeddings_container `
+-v "$absolute_output":$output_container $image $command
diff --git a/scripts/classify.sh b/scripts/classify.sh
@@ -41,7 +41,7 @@ recognizer_configs["cgw"]="cgw.classify.yml"
 if [[ -n ${recognizer_configs[$recognizer]} ]]; then
     echo "Using config file: ${recognizer_configs[$recognizer]}"
 else
-    echo "Recognizer $key not supported"
+    echo "Recognizer $recognizer not supported"
     exit 1
 fi
 
@@ -50,18 +50,19 @@ embeddings_container="/mnt/embeddings"
 output_container="/mnt/output"
 output_dir=$output_container/search_results
 
-command="python /app/src/app.py classify --source_folder $embeddings_container --output_folder $output_container  --config_file ${recognizer_configs[$recognizer]}"
-
-#command="python /app/src/app.py --embeddings_dir $embeddings_container --model_path $model_path --output_dir $output_dir --skip_if_file_exists"
-
+command="python /app/src/app.py classify --source $embeddings_container --output $output_container  --config_file ${recognizer_configs[$recognizer]}"
 
 
+# Convert to absolute paths
+absolute_source=$(realpath "$source")
+absolute_output=$(realpath "$output")
 
 echo "launching container with command: $command"
 
 set -x
 docker run --user appuser:appuser --rm \
--v "$(pwd)/src":/app/src \
--v "$source":$embeddings_container \
--v "$output":$output_container $image $command
+-v "$absolute_source":$embeddings_container \
+-v "$absolute_output":$output_container $image $command
 set +x
+
+#-v "$(pwd)/src":/app/src \
diff --git a/scripts/embed.ps1 b/scripts/embed.ps1
@@ -0,0 +1,40 @@
+# Argument Parsing
+$source = $args[0]
+$output = $args[1]
+$image = $args[2]
+if ($null -eq $image) { $image = "qutecoacoustics/perchrunner:latest" }
+
+# Required Parameter Validation
+if ([string]::IsNullOrWhiteSpace($source) -or [string]::IsNullOrWhiteSpace($output)) {
+    Write-Host "Error: Missing required parameters (source, output)"
+    exit 1
+}
+
+Write-Host (Get-Location)
+Write-Host $source
+
+# Source Path Checks
+if (-not (Test-Path -Path $source -PathType Leaf)) {
+    Write-Host "Error: Source audio folder does not exist: $source"
+    exit 1
+}
+
+# Paths to things inside the container, to be mounted
+$source_container = "/mnt/input"
+$output_container = "/mnt/output"
+
+$source_folder_host = Split-Path -Path $source -Parent
+$source_basename = Split-Path -Path $source -Leaf
+
+$command = "python /app/src/app.py generate --source $source_container/$source_basename --output $output_container"
+
+Write-Host "launching container with command: $command"
+
+# Convert to absolute paths
+$absolute_source = (Resolve-Path -Path $source_folder_host).Path
+$absolute_output = (Resolve-Path -Path $output).Path
+
+# Launch Docker container
+& docker run --user appuser:appuser --rm `
+-v "$absolute_source":$source_container `
+-v "$absolute_output":$output_container $image $command
diff --git a/scripts/embed.sh b/scripts/embed.sh
@@ -1,7 +1,6 @@
 #! /bin/bash
 
-# launches a docker container interactive with the necessary mounts for running inference on a folder of embeddings
-
+# launches a docker container interactive with the necessary mounts for generating embeddings on a folder of wav files
 # Argument Parsing
 source="$1"
 output="$2"
@@ -22,10 +21,6 @@ if [[ ! -s "$source" ]]; then
     exit 1
 fi
 
-if [[ ! -s "$source" ]]; then
-    echo "Error: Source is empty: $source"
-    exit 1
-fi
 
 # paths to things inside the container, to be mounted
 source_container="/mnt/input"
@@ -34,15 +29,18 @@ output_container="/mnt/output"
 source_folder_host=$(dirname "$source")
 source_basename=$(basename "$source")
 
-command="python /app/src/app.py generate --source_file $source_container/$source_basename --output_folder $output_container"
+command="python /app/src/app.py generate --source $source_container/$source_basename --output $output_container"
 
 echo "launching container with command: $command"
 
+# Convert to absolute paths
+absolute_source=$(realpath "$source_folder_host")
+absolute_output=$(realpath "$output")
+
 set -x
 docker run --user appuser:appuser --rm \
--v "$(pwd)/src":/app/src \
--v "$source_folder_host":$source_container \
--v "$output":$output_container $image $command
+-v "$absolute_source":$source_container \
+-v "$absolute_output":$output_container $image $command
 set +x
 
 # add this in to mount the source directory to run changes without rebuilding

diff --git a/scripts/instructions.md b/scripts/instructions.md
@@ -0,0 +1,28 @@
+# Step 1. Install Docker
+
+Go to https://www.docker.com/get-started/ and install Docker for your computer if you don't already have it installed. The recognizer is provided as a docker container, and you need this software installed to be able to run it. 
+
+# Step 2. Embed audio
+
+1. Open a terminal window
+2. Change directory to this scripts directory
+3. Run the following command:
+  - windows: `pwsh embed.ps1 [path_to_audio_folder] [path_to_embeddings_output_folder]`
+  - linux or intel mac: `./embed.sh [path_to_audio_folder] [path_to_embeddings_output_folder]`
+
+
+Notes
+- In the command above, replace the placeholders with your real audio and output folder. The output folder is where the embeddings files will get saved.
+- This will take quite a long time to run. It's possible that it's too slow to be practical, depending on how much audio you have
+
+# Step 3. Classify embeddings
+
+1. Open a terminal window
+2. Change directory to this scripts directory
+3. Run the following command:
+  - windows: `pwsh classify.ps1 [path_to_audio_folder] [path_to_embeddings_folder] 'pw'`
+  - linux or intel mac: `./classify.sh [path_to_audio_folder] [path_to_classifications_output_folder] 'pw'`
+
+
+Notes
+- In the command above, replace the placeholders with your real embeddings folder (which you specified in step 1) and output folder. The output folder is where the csv files of classifications will be saved. These files score each 5 second segment. Any score above zero is a positive classification. 
diff --git a/src/__init__.py b/src/__init__.py
diff --git a/src/app.py b/src/app.py
@@ -4,12 +4,19 @@
 Entrypoint for processing a single file
 """
 
+from pathlib import Path
+print("------ ----- ----- --- -")
+print(Path.cwd())
+
+for item in Path('src').iterdir():
+    print(item)
+
+
 import argparse
 from src.config import load_config
 from src.embed_audio_slim import embed_file_and_save, embed_folder
 from src.inference_parquet import classify_file_and_save, process_folder
-#import train_linear_model
-#import inference_slim
+
 
 def main():
 
@@ -19,40 +26,46 @@ def main():
 
     parser = argparse.ArgumentParser()
     parser.add_argument("command", choices=list(valid_commands), help=" | ".join(valid_commands))
-    parser.add_argument("--source_file", help="path to the file to analyze")
-    parser.add_argument("--source_folder", help="path to the a folder of files to analyze")
+    parser.add_argument("--source", help="path to the file to analyze")
+    #parser.add_argument("--source_folder", help="path to the a folder of files to analyze")
     parser.add_argument("--config_file", default=None, help="path to the config file")
-    parser.add_argument("--output_folder", help="where to save the result file")
+    parser.add_argument("--output", help="where to save the result file")
     args = parser.parse_args()
 
-    if bool(args.source_file) == bool(args.source_folder):
-        parser.error('You must specify exactly one of --source_file or --source_folder, not both.')
+    # if bool(args.source_file) == bool(args.source_folder):
+    #     parser.error('You must specify exactly one of --source_file or --source_folder, not both.')
+
+    source = Path(args.source)
+    if not source.exists():
+        parser.error(f'source {source} does not exist')
+
+
 
 
     config = load_config(args.config_file)
 
-    if args.source_file:
+    if source.is_file():
 
         if args.command == "generate":
-            embed_file_and_save(args.source_file, args.output_folder, config)
+            embed_file_and_save(source, args.output, config)
         elif args.command == "train":
-            parser.error('Incompatible args. Please specify --source_folder, not --source_file.')
+            parser.error('Incompatible args. Please specify a folder for source.')
         elif args.command == "classify":
-            print(f"classify file {args.source_file} to {args.output_folder}")
-            classify_file_and_save(args.source_file, args.output_folder, config)
+            print(f"classify file {source} to {args.output}")
+            classify_file_and_save(source, args.output, config)
         else:
             print("invalid command")
 
     else:
 
         if args.command == "generate":
-            embed_folder(args.source_folder, args.output_folder, config)
+            embed_folder(source, args.output, config)
         elif args.command == "train":
             # train_linear_model.train(args.source_file, config, args.output_folder)
             print("train: not implemented yet")
         elif args.command == "classify":
-            print(f"classify folder {args.source_folder} to {args.output_folder}")
-            process_folder(args.source_folder, args.output_folder, config)
+            print(f"classify folder {source} to {args.output}")
+            process_folder(source, args.output, config)
         else:
             print("invalid command")
Original file line number	Diff line number	Diff line change
Expand Up		@@ -22,3 +22,4 @@ RUN pip install librosa numpy pytest pytest-mock

		RUN useradd -u 1000 -ms /bin/bash appuser

		ENV PYTHONPATH="/app:$PYTHONPATH"