Skip to content

Commit

Permalink
working on scripts for launching container
Browse files Browse the repository at this point in the history
  • Loading branch information
peichins committed Mar 20, 2024
1 parent 211c97e commit 1954f6f
Show file tree
Hide file tree
Showing 14 changed files with 235 additions and 86 deletions.
9 changes: 7 additions & 2 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,12 +1,17 @@
__pycache__

tests/output/*

!test/output/.gitkeep

tests/input/*
!test/input/.gitkeep

local_scripts/*

*.ipynb_checkpoints/
*.ipynb_checkpoints/*

.vscode/launch.local.json
.vscode/launch.local.json

temp
temp/*
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -22,3 +22,4 @@ RUN pip install librosa numpy pytest pytest-mock

RUN useradd -u 1000 -ms /bin/bash appuser

ENV PYTHONPATH="/app:$PYTHONPATH"
41 changes: 0 additions & 41 deletions pyproject_bak.toml

This file was deleted.

61 changes: 61 additions & 0 deletions scripts/classify.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
# Argument Parsing
$source = $args[0]
$output = $args[1]
$recognizer = $args[2]
$image = $args[3]
if ($null -eq $image) { $image = "qutecoacoustics/perchrunner:latest" }

# Required Parameter Validation
if ([string]::IsNullOrWhiteSpace($source) -or [string]::IsNullOrWhiteSpace($output) -or [string]::IsNullOrWhiteSpace($recognizer)) {
Write-Host "Error: Missing required parameters (--source, --output, --recognizer)"
exit 1
}

# Source Path Checks
if (-not (Test-Path -Path $source)) {
Write-Host "Error: Source path does not exist: $source"
exit 1
}
elseif ((Test-Path -Path $source -PathType Container) -and ((Get-ChildItem -Path $source).Count -eq 0)) {
Write-Host "Error: Source directory is empty: $source"
exit 1
}

# Output Folder Check
if (-not (Test-Path -Path $output -PathType Container)) {
Write-Host "Error: Output folder does not exist: $output"
exit 1
}

# Recognizer Config Mapping
$recognizer_configs = @{
"pw" = "pw.classify.yml"
"cgw" = "cgw.classify.yml"
}

$config_file = $recognizer_configs[$recognizer]
if ($null -eq $config_file) {
Write-Host "Recognizer $recognizer not supported"
exit 1
}
else {
Write-Host "Using config file: $config_file"
}

# Paths inside the container, to be mounted
$embeddings_container = "/mnt/embeddings"
$output_container = "/mnt/output"
$output_dir = Join-Path $output_container "search_results"

$command = "python /app/src/app.py classify --source $embeddings_container --output $output_container --config_file $config_file"

# Convert to absolute paths
$absolute_source = (Resolve-Path -Path $source).Path
$absolute_output = (Resolve-Path -Path $output).Path

Write-Host "launching container with command: $command"

# Launch Docker container
docker run --user appuser:appuser --rm `
-v "$absolute_source":$embeddings_container `
-v "$absolute_output":$output_container $image $command
17 changes: 9 additions & 8 deletions scripts/classify.sh
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ recognizer_configs["cgw"]="cgw.classify.yml"
if [[ -n ${recognizer_configs[$recognizer]} ]]; then
echo "Using config file: ${recognizer_configs[$recognizer]}"
else
echo "Recognizer $key not supported"
echo "Recognizer $recognizer not supported"
exit 1
fi

Expand All @@ -50,18 +50,19 @@ embeddings_container="/mnt/embeddings"
output_container="/mnt/output"
output_dir=$output_container/search_results

command="python /app/src/app.py classify --source_folder $embeddings_container --output_folder $output_container --config_file ${recognizer_configs[$recognizer]}"

#command="python /app/src/app.py --embeddings_dir $embeddings_container --model_path $model_path --output_dir $output_dir --skip_if_file_exists"

command="python /app/src/app.py classify --source $embeddings_container --output $output_container --config_file ${recognizer_configs[$recognizer]}"


# Convert to absolute paths
absolute_source=$(realpath "$source")
absolute_output=$(realpath "$output")

echo "launching container with command: $command"

set -x
docker run --user appuser:appuser --rm \
-v "$(pwd)/src":/app/src \
-v "$source":$embeddings_container \
-v "$output":$output_container $image $command
-v "$absolute_source":$embeddings_container \
-v "$absolute_output":$output_container $image $command
set +x

#-v "$(pwd)/src":/app/src \
40 changes: 40 additions & 0 deletions scripts/embed.ps1
Original file line number Diff line number Diff line change
@@ -0,0 +1,40 @@
# Argument Parsing
$source = $args[0]
$output = $args[1]
$image = $args[2]
if ($null -eq $image) { $image = "qutecoacoustics/perchrunner:latest" }

# Required Parameter Validation
if ([string]::IsNullOrWhiteSpace($source) -or [string]::IsNullOrWhiteSpace($output)) {
Write-Host "Error: Missing required parameters (source, output)"
exit 1
}

Write-Host (Get-Location)
Write-Host $source

# Source Path Checks
if (-not (Test-Path -Path $source -PathType Leaf)) {
Write-Host "Error: Source audio folder does not exist: $source"
exit 1
}

# Paths to things inside the container, to be mounted
$source_container = "/mnt/input"
$output_container = "/mnt/output"

$source_folder_host = Split-Path -Path $source -Parent
$source_basename = Split-Path -Path $source -Leaf

$command = "python /app/src/app.py generate --source $source_container/$source_basename --output $output_container"

Write-Host "launching container with command: $command"

# Convert to absolute paths
$absolute_source = (Resolve-Path -Path $source_folder_host).Path
$absolute_output = (Resolve-Path -Path $output).Path

# Launch Docker container
& docker run --user appuser:appuser --rm `
-v "$absolute_source":$source_container `
-v "$absolute_output":$output_container $image $command
18 changes: 8 additions & 10 deletions scripts/embed.sh
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#! /bin/bash

# launches a docker container interactive with the necessary mounts for running inference on a folder of embeddings

# launches a docker container interactive with the necessary mounts for generating embeddings on a folder of wav files
# Argument Parsing
source="$1"
output="$2"
Expand All @@ -22,10 +21,6 @@ if [[ ! -s "$source" ]]; then
exit 1
fi

if [[ ! -s "$source" ]]; then
echo "Error: Source is empty: $source"
exit 1
fi

# paths to things inside the container, to be mounted
source_container="/mnt/input"
Expand All @@ -34,15 +29,18 @@ output_container="/mnt/output"
source_folder_host=$(dirname "$source")
source_basename=$(basename "$source")

command="python /app/src/app.py generate --source_file $source_container/$source_basename --output_folder $output_container"
command="python /app/src/app.py generate --source $source_container/$source_basename --output $output_container"

echo "launching container with command: $command"

# Convert to absolute paths
absolute_source=$(realpath "$source_folder_host")
absolute_output=$(realpath "$output")

set -x
docker run --user appuser:appuser --rm \
-v "$(pwd)/src":/app/src \
-v "$source_folder_host":$source_container \
-v "$output":$output_container $image $command
-v "$absolute_source":$source_container \
-v "$absolute_output":$output_container $image $command
set +x

# add this in to mount the source directory to run changes without rebuilding
Expand Down
28 changes: 28 additions & 0 deletions scripts/instructions.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# Step 1. Install Docker

Go to https://www.docker.com/get-started/ and install Docker for your computer if you don't already have it installed. The recognizer is provided as a docker container, and you need this software installed to be able to run it.

# Step 2. Embed audio

1. Open a terminal window
2. Change directory to this scripts directory
3. Run the following command:
- windows: `pwsh embed.ps1 [path_to_audio_folder] [path_to_embeddings_output_folder]`
- linux or intel mac: `./embed.sh [path_to_audio_folder] [path_to_embeddings_output_folder]`


Notes
- In the command above, replace the placeholders with your real audio and output folder. The output folder is where the embeddings files will get saved.
- This will take quite a long time to run. It's possible that it's too slow to be practical, depending on how much audio you have

# Step 3. Classify embeddings

1. Open a terminal window
2. Change directory to this scripts directory
3. Run the following command:
- windows: `pwsh classify.ps1 [path_to_audio_folder] [path_to_embeddings_folder] 'pw'`
- linux or intel mac: `./classify.sh [path_to_audio_folder] [path_to_classifications_output_folder] 'pw'`


Notes
- In the command above, replace the placeholders with your real embeddings folder (which you specified in step 1) and output folder. The output folder is where the csv files of classifications will be saved. These files score each 5 second segment. Any score above zero is a positive classification.
Empty file added src/__init__.py
Empty file.
43 changes: 28 additions & 15 deletions src/app.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,19 @@
Entrypoint for processing a single file
"""

from pathlib import Path
print("------ ----- ----- --- -")
print(Path.cwd())

for item in Path('src').iterdir():
print(item)


import argparse
from src.config import load_config
from src.embed_audio_slim import embed_file_and_save, embed_folder
from src.inference_parquet import classify_file_and_save, process_folder
#import train_linear_model
#import inference_slim


def main():

Expand All @@ -19,40 +26,46 @@ def main():

parser = argparse.ArgumentParser()
parser.add_argument("command", choices=list(valid_commands), help=" | ".join(valid_commands))
parser.add_argument("--source_file", help="path to the file to analyze")
parser.add_argument("--source_folder", help="path to the a folder of files to analyze")
parser.add_argument("--source", help="path to the file to analyze")
#parser.add_argument("--source_folder", help="path to the a folder of files to analyze")
parser.add_argument("--config_file", default=None, help="path to the config file")
parser.add_argument("--output_folder", help="where to save the result file")
parser.add_argument("--output", help="where to save the result file")
args = parser.parse_args()

if bool(args.source_file) == bool(args.source_folder):
parser.error('You must specify exactly one of --source_file or --source_folder, not both.')
# if bool(args.source_file) == bool(args.source_folder):
# parser.error('You must specify exactly one of --source_file or --source_folder, not both.')

source = Path(args.source)
if not source.exists():
parser.error(f'source {source} does not exist')




config = load_config(args.config_file)

if args.source_file:
if source.is_file():

if args.command == "generate":
embed_file_and_save(args.source_file, args.output_folder, config)
embed_file_and_save(source, args.output, config)
elif args.command == "train":
parser.error('Incompatible args. Please specify --source_folder, not --source_file.')
parser.error('Incompatible args. Please specify a folder for source.')
elif args.command == "classify":
print(f"classify file {args.source_file} to {args.output_folder}")
classify_file_and_save(args.source_file, args.output_folder, config)
print(f"classify file {source} to {args.output}")
classify_file_and_save(source, args.output, config)
else:
print("invalid command")

else:

if args.command == "generate":
embed_folder(args.source_folder, args.output_folder, config)
embed_folder(source, args.output, config)
elif args.command == "train":
# train_linear_model.train(args.source_file, config, args.output_folder)
print("train: not implemented yet")
elif args.command == "classify":
print(f"classify folder {args.source_folder} to {args.output_folder}")
process_folder(args.source_folder, args.output_folder, config)
print(f"classify folder {source} to {args.output}")
process_folder(source, args.output, config)
else:
print("invalid command")

Expand Down
Loading

0 comments on commit 1954f6f

Please sign in to comment.