Skip to content

Commit

Permalink
Installing chirp and training model (#4)
Browse files Browse the repository at this point in the history
Docker image that installs chirp and scripts to generate embeddings and run linear model over those embeddings
  • Loading branch information
peichins authored Mar 18, 2024
1 parent e56bec0 commit a4dfc9e
Show file tree
Hide file tree
Showing 72 changed files with 10,874 additions and 1 deletion.
64 changes: 64 additions & 0 deletions .devcontainer/devcontainer.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,64 @@
// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
// https://github.com/microsoft/vscode-dev-containers/tree/v0.245.0/containers/docker-existing-dockerfile
{
"name": "Existing Dockerfile",

"build": {
"dockerfile": "../Dockerfile",
// Update 'VARIANT' to pick a Node version: 16, 14, 12.
// Append -bullseye or -buster to pin to an OS version.
// Use -bullseye variants on local arm64/Apple Silicon.
"args": { "VERSION": "00001122334455_0123abc" }
},

// Use 'forwardPorts' to make a list of ports inside the container available locally.
// "forwardPorts": [],

// Uncomment the next line to run commands after the container is created - for example installing curl.
// "postCreateCommand": "apt-get update && apt-get install -y curl",

// Uncomment when using a ptrace-based debugger like C++, Go, and Rust
// "runArgs": [ "-e BASE_PATH=/"],

"containerEnv": {},

// "workspaceMount": "source=${localWorkspaceFolder},target=/app,type=bind",
// "workspaceFolder": "/app",

// Uncomment to use the Docker CLI from inside the container. See https://aka.ms/vscode-remote/samples/docker-from-docker.
"mounts": [
//"source=${localWorkspaceFolder}/src,target=/app/src,type=bind",
// "source=${localWorkspaceFolder}/tests,target=/app/tests,type=bind",
"source=/mnt/availae_results,target=/mnt/availae_results,type=bind",
"source=/mnt/c/Users/Administrator/Documents/phil,target=/phil,type=bind",
"source=/mnt/c/Users/Administrator/Documents/phil,target=/phil,type=bind",
"source=/mnt/c/Users/Administrator/Documents/phil/output,target=/output,type=bind"
],

//"workspaceMount": "source=vscode-extensions,target=/vscode/extensions,type=volume",

// Uncomment to connect as a non-root user if you've added one. See https://aka.ms/vscode-remote/containers/non-root.
"remoteUser": "appuser",

// Configure tool-specific properties.
"customizations": {
// Configure properties specific to VS Code.
"vscode": {
"settings": {
"python.testing.pytestArgs": [
"tests/app_tests"
]
},
"extensions": [
"ms-python.python",
"ms-python.pylint"
]
}
},
"settings": {
"python.testing.pytestArgs": [
"tests/app_tests"
]
}

}
66 changes: 66 additions & 0 deletions .github/workflows/build.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,66 @@
# $schema: https://json.schemastore.org/github-action.json
name: build

on:
push:
branches:
- "*"
paths-ignore:
- "**/*.md"
pull_request:
paths-ignore:
- "**/*.md"

jobs:
build:
runs-on: ubuntu-latest

steps:
- uses: nschloe/action-cached-lfs-checkout@v1

- uses: docker/setup-buildx-action@v2
id: buildx
with:
install: true


- name: Build image
# run build script, get the version from the last line of output, then save it to the env var for use in subsequent steps
# echo "CRANE_VERSION=$(sh ./build.sh | tail -n1)" >> $GITHUB_ENV
run: |
source ./build.sh
echo "PR_VERSION=$PR_VERSION" >> $GITHUB_ENV
# reading and writing to mounted directories doesn't work unless permissions are given to 'other'
- name: permissions for running tests with mounts
run: |
find ./tests -type d -exec chmod 777 {} +
find ./tests -type f -exec chmod 644 {} +
- name: Test from container
run: ./scripts/run_tests.sh

# set up python so we can run pytest to test invoking the container from the host
# - name: Set up Python 3.9
# uses: actions/setup-python@v4
# with:
# python-version: 3.9

# - name: Test from Host
# run: |
# python -m pip install --upgrade pip
# pip install pytest
# pytest ./src/tests/end_to_end

- name: Login to Docker Hub
if: (github.ref == 'refs/heads/main' || contains(github.ref, 'refs/tag'))
uses: docker/login-action@v2
with:
username: ${{ secrets.DOCKER_USER }}
password: ${{ secrets.DOCKER_PASSWORD }}

- name: push to docker
if: (github.ref == 'refs/heads/main' || contains(github.ref, 'refs/tag'))
run: |
docker push qutecoacoustics/perchrunner:${{ env.PR_VERSION }}
docker push qutecoacoustics/perchrunner:latest
12 changes: 12 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
__pycache__

tests/output/*

!test/output/.gitkeep

local_scripts/*

*.ipynb_checkpoints/
*.ipynb_checkpoints/*

.vscode/launch.local.json
7 changes: 7 additions & 0 deletions .vscode/settings.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
{
"python.testing.pytestArgs": [
"tests/app_tests"
],
"python.testing.unittestEnabled": false,
"python.testing.pytestEnabled": true
}
60 changes: 60 additions & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
FROM --platform=linux/amd64 python:3.10-bookworm as perch_runner_dev

RUN apt update
RUN apt install -y libsndfile1 ffmpeg

# download and install poetry
# consider changing to pip install 'poetry==$POETRY_VERSION'
# see https://stackoverflow.com/questions/53835198/integrating-python-poetry-with-docker
# RUN curl -sSL https://install.python-poetry.org | python3 -

RUN mkdir /app && mkdir /app/src
WORKDIR /app


# we use our own pyproject file modified from the perch on
# because we have extra dev dependencies, and also we remove some unecessary
# deps from the basic perch
COPY ./pyproject.toml /app

# install perch dependencies (not in venv since we are using docker)
# ENV PATH="/root/.local/bin:$PATH"
# RUN poetry config virtualenvs.create false --local
# RUN /root/.local/bin/poetry install
# # this is due to "connection pool is full" error
# # https://stackoverflow.com/questions/74385209/poetry-install-throws-connection-pool-is-full-discarding-connection-pypi-org
# RUN poetry config installer.max-workers 10
# RUN poetry install --no-interaction --no-ansi -vvv

# install perch_runner dependencies

# RUN pip install git+https://github.com/google-research/perch.git@8cc4468afaac730e77d84ac447f0874f09d10a25
RUN pip install git+https://github.com/google-research/perch.git@3746672d406c6cfe48acb0e725248cea05f57445

# there seems to be a problem with the way flax interacts with jaxlib
# this is a temporary fix to see if it helps without introducing other problems
# RUN pip install --upgrade flax jax jaxlib

#COPY --from=perch_runner_build /app/requirements.txt /app

# RUN pip install .
# RUN pip install .[dev]

COPY ./src /app/src

# this is the trained linear models
COPY ./models /models

# this is the embedding model
RUN python /app/src/download_model.py --version 4 --destination /models

# ENV PYTHONPATH "${PYTHONPATH}:/app/perch-main"

#COPY ./tests /app/tests

RUN pip install librosa
RUN pip install numpy
RUN pip install pytest pytest-mock

RUN useradd -u 1000 -ms /bin/bash appuser

41 changes: 41 additions & 0 deletions Dockerfile copy
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
FROM --platform=linux/amd64 python:3.10-bookworm as perch_runner

RUN apt update
RUN apt install -y libsndfile1 ffmpeg

# download and install poetry
# consider changing to pip install 'poetry==$POETRY_VERSION'
# see https://stackoverflow.com/questions/53835198/integrating-python-poetry-with-docker
RUN curl -sSL https://install.python-poetry.org | python3 -

RUN mkdir /app && mkdir /app/src
WORKDIR /app

# download and unzip Perch
ARG perch_repo=https://github.com/google-research/chirp/archive/refs/heads/main.zip
RUN wget $perch_repo && unzip main.zip && rm main.zip
# WORKDIR /app/perch-main
WORKDIR /app

# we use our own pyproject file modified from the perch on
# because we have extra dev dependencies, and also we remove some unecessary
# deps from the basic perch
COPY ./pyproject.toml /app

# install perch dependencies (not in venv since we are using docker)
ENV PATH="/root/.local/bin:$PATH"
RUN poetry config virtualenvs.create false --local
# RUN /root/.local/bin/poetry install
# this is due to "connection pool is full" error
# https://stackoverflow.com/questions/74385209/poetry-install-throws-connection-pool-is-full-discarding-connection-pypi-org
RUN poetry config installer.max-workers 10
RUN poetry install --no-interaction --no-ansi -vvv

# install perch_runner dependencies

COPY ./src /app/src

RUN python /app/src/download_model.py --version 4 --destination /models

ENV PYTHONPATH "${PYTHONPATH}:/app/perch-main"

60 changes: 60 additions & 0 deletions Dockerfile_alpine
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
FROM --platform=linux/amd64 python:3.10-alpine as perch_runner_dev

# Install system dependencies
RUN apk update && apk add --no-cache libsndfile ffmpeg git

# download and install poetry
# consider changing to pip install 'poetry==$POETRY_VERSION'
# see https://stackoverflow.com/questions/53835198/integrating-python-poetry-with-docker
# RUN curl -sSL https://install.python-poetry.org | python3 -

RUN mkdir /app && mkdir /app/src
WORKDIR /app


# we use our own pyproject file modified from the perch on
# because we have extra dev dependencies, and also we remove some unecessary
# deps from the basic perch
COPY ./pyproject.toml /app

# install perch dependencies (not in venv since we are using docker)
# ENV PATH="/root/.local/bin:$PATH"
# RUN poetry config virtualenvs.create false --local
# RUN /root/.local/bin/poetry install
# # this is due to "connection pool is full" error
# # https://stackoverflow.com/questions/74385209/poetry-install-throws-connection-pool-is-full-discarding-connection-pypi-org
# RUN poetry config installer.max-workers 10
# RUN poetry install --no-interaction --no-ansi -vvv

# install perch_runner dependencies

# RUN pip install git+https://github.com/google-research/perch.git@8cc4468afaac730e77d84ac447f0874f09d10a25
RUN pip install git+https://github.com/google-research/perch.git@3746672d406c6cfe48acb0e725248cea05f57445

# there seems to be a problem with the way flax interacts with jaxlib
# this is a temporary fix to see if it helps without introducing other problems
# RUN pip install --upgrade flax jax jaxlib

#COPY --from=perch_runner_build /app/requirements.txt /app

# RUN pip install .
# RUN pip install .[dev]

COPY ./src /app/src

# this is the trained linear models
COPY ./models /models

# this is the embedding model
RUN python /app/src/download_model.py --version 4 --destination /models

# ENV PYTHONPATH "${PYTHONPATH}:/app/perch-main"

#COPY ./tests /app/tests

RUN pip install librosa
RUN pip install numpy
RUN pip install pytest pytest-mock

RUN useradd -u 1000 -ms /bin/bash appuser

25 changes: 25 additions & 0 deletions Docs.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
This document contains instructions to run one of the following recognizers over unlabelled audio
- pw (Plains Wanderer)
- cgw (Carpentarian Grass Wren)

# Setup

1. Prepare the paths to the relevant folders

You will need the following paths on your computer
1. A folder with audio recordings to analyse (input files)
2. A writable folder where we can store temporary files
3. A writeable folder where we can save the results for each input file

2. Install docker if you don't have it.

You will need **Docker** installed on your computer

To install docker, please see `https://docs.docker.com/engine/install/`. Docker is software that can run a docker *container*. The recognizer (i.e. the trained model and the scripts needed to run it) and its dependencies are all bundled into this container.


# Running the recognizer

Then, open a terminal shell and enter the following command

./Scripts/run_inference.sh
Loading

0 comments on commit a4dfc9e

Please sign in to comment.