Installing chirp and training model (#4)

Docker image that installs chirp and scripts to generate embeddings and run linear model over those embeddings
QutEcoacoustics · Mar 18, 2024 · a4dfc9e · a4dfc9e
1 parent e56bec0
commit a4dfc9e
Show file tree

Hide file tree

Showing 72 changed files with 10,874 additions and 1 deletion.
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
@@ -0,0 +1,64 @@
+// For format details, see https://aka.ms/devcontainer.json. For config options, see the README at:
+// https://github.com/microsoft/vscode-dev-containers/tree/v0.245.0/containers/docker-existing-dockerfile
+{
+	"name": "Existing Dockerfile",
+
+	"build": {
+		"dockerfile": "../Dockerfile",
+		// Update 'VARIANT' to pick a Node version: 16, 14, 12.
+		// Append -bullseye or -buster to pin to an OS version.
+		// Use -bullseye variants on local arm64/Apple Silicon.
+		"args": { "VERSION": "00001122334455_0123abc" }
+	},
+
+	// Use 'forwardPorts' to make a list of ports inside the container available locally.
+	// "forwardPorts": [],
+
+	// Uncomment the next line to run commands after the container is created - for example installing curl.
+	// "postCreateCommand": "apt-get update && apt-get install -y curl",
+
+	// Uncomment when using a ptrace-based debugger like C++, Go, and Rust
+	// "runArgs": [ "-e BASE_PATH=/"],
+
+	"containerEnv": {},
+
+	// "workspaceMount": "source=${localWorkspaceFolder},target=/app,type=bind",
+    // "workspaceFolder": "/app",
+
+	// Uncomment to use the Docker CLI from inside the container. See https://aka.ms/vscode-remote/samples/docker-from-docker.
+	"mounts": [
+        //"source=${localWorkspaceFolder}/src,target=/app/src,type=bind",
+		    // "source=${localWorkspaceFolder}/tests,target=/app/tests,type=bind",
+        "source=/mnt/availae_results,target=/mnt/availae_results,type=bind",
+		    "source=/mnt/c/Users/Administrator/Documents/phil,target=/phil,type=bind",
+        "source=/mnt/c/Users/Administrator/Documents/phil,target=/phil,type=bind",
+        "source=/mnt/c/Users/Administrator/Documents/phil/output,target=/output,type=bind"
+	],
+
+	//"workspaceMount": "source=vscode-extensions,target=/vscode/extensions,type=volume",
+
+	// Uncomment to connect as a non-root user if you've added one. See https://aka.ms/vscode-remote/containers/non-root.
+	"remoteUser": "appuser",
+
+	// Configure tool-specific properties.
+	"customizations": {
+		// Configure properties specific to VS Code.
+		"vscode": {
+			"settings": {
+				"python.testing.pytestArgs": [
+					"tests/app_tests"
+				]
+			},
+			"extensions": [
+				"ms-python.python",
+				"ms-python.pylint"
+			]
+		}
+	},
+	"settings": {
+		"python.testing.pytestArgs": [
+			"tests/app_tests"
+		]
+	}
+
+}
diff --git a/.github/workflows/build.yml b/.github/workflows/build.yml
@@ -0,0 +1,66 @@
+# $schema: https://json.schemastore.org/github-action.json
+name: build
+
+on:
+  push:
+    branches:
+      - "*"
+    paths-ignore:
+      - "**/*.md"
+  pull_request:
+    paths-ignore:
+      - "**/*.md"
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+
+    steps:
+      - uses: nschloe/action-cached-lfs-checkout@v1
+
+      - uses: docker/setup-buildx-action@v2
+        id: buildx
+        with:
+          install: true
+
+
+      - name: Build image
+        # run build script, get the version from the last line of output, then save it to the env var for use in subsequent steps
+        # echo "CRANE_VERSION=$(sh ./build.sh | tail -n1)" >> $GITHUB_ENV
+        run: |
+          source ./build.sh
+          echo "PR_VERSION=$PR_VERSION" >> $GITHUB_ENV
+
+      # reading and writing to mounted directories doesn't work unless permissions are given to 'other'
+      - name: permissions for running tests with mounts
+        run: |
+          find ./tests -type d -exec chmod 777 {} +
+          find ./tests -type f -exec chmod 644 {} +
+
+      - name: Test from container
+        run: ./scripts/run_tests.sh
+
+      # set up python so we can run pytest to test invoking the container from the host
+      # - name: Set up Python 3.9
+      #   uses: actions/setup-python@v4
+      #   with:
+      #     python-version: 3.9
+
+      # - name: Test from Host
+      #   run: |
+      #     python -m pip install --upgrade pip
+      #     pip install pytest 
+      #     pytest ./src/tests/end_to_end
+
+      - name: Login to Docker Hub
+        if: (github.ref == 'refs/heads/main' || contains(github.ref, 'refs/tag'))
+        uses: docker/login-action@v2
+        with:
+          username: ${{ secrets.DOCKER_USER }}
+          password: ${{ secrets.DOCKER_PASSWORD }}
+
+      - name: push to docker
+        if: (github.ref == 'refs/heads/main' || contains(github.ref, 'refs/tag'))
+        run: |
+          docker push qutecoacoustics/perchrunner:${{ env.PR_VERSION }}
+          docker push qutecoacoustics/perchrunner:latest
diff --git a/.gitignore b/.gitignore
@@ -0,0 +1,12 @@
+__pycache__
+
+tests/output/*
+
+!test/output/.gitkeep
+
+local_scripts/*
+
+*.ipynb_checkpoints/
+*.ipynb_checkpoints/*
+
+.vscode/launch.local.json
diff --git a/.vscode/settings.json b/.vscode/settings.json
@@ -0,0 +1,7 @@
+{
+    "python.testing.pytestArgs": [
+        "tests/app_tests"
+    ],
+    "python.testing.unittestEnabled": false,
+    "python.testing.pytestEnabled": true
+}
diff --git a/Dockerfile b/Dockerfile
@@ -0,0 +1,60 @@
+FROM --platform=linux/amd64 python:3.10-bookworm as perch_runner_dev
+
+RUN apt update 
+RUN apt install -y libsndfile1 ffmpeg
+
+# download and install poetry
+# consider changing to pip install 'poetry==$POETRY_VERSION'
+# see https://stackoverflow.com/questions/53835198/integrating-python-poetry-with-docker
+# RUN curl -sSL https://install.python-poetry.org | python3 -
+
+RUN mkdir /app && mkdir /app/src
+WORKDIR /app
+
+
+# we use our own pyproject file modified from the perch on
+# because we have extra dev dependencies, and also we remove some unecessary
+# deps from the basic perch
+COPY ./pyproject.toml /app
+
+# install perch dependencies (not in venv since we are using docker)
+# ENV PATH="/root/.local/bin:$PATH"
+# RUN poetry config virtualenvs.create false --local
+# RUN /root/.local/bin/poetry install
+# # this is due to "connection pool is full" error
+# # https://stackoverflow.com/questions/74385209/poetry-install-throws-connection-pool-is-full-discarding-connection-pypi-org
+# RUN poetry config installer.max-workers 10
+# RUN poetry install --no-interaction --no-ansi -vvv
+
+# install perch_runner dependencies
+
+# RUN pip install git+https://github.com/google-research/perch.git@8cc4468afaac730e77d84ac447f0874f09d10a25
+RUN pip install git+https://github.com/google-research/perch.git@3746672d406c6cfe48acb0e725248cea05f57445
+
+# there seems to be a problem with the way flax interacts with jaxlib 
+# this is a temporary fix to see if it helps without introducing other problems
+# RUN pip install --upgrade flax jax jaxlib
+
+#COPY --from=perch_runner_build /app/requirements.txt /app
+
+# RUN pip install .
+# RUN pip install .[dev]
+
+COPY ./src /app/src
+
+# this is the trained linear models
+COPY ./models /models
+
+# this is the embedding model
+RUN python /app/src/download_model.py --version 4 --destination /models
+
+# ENV PYTHONPATH "${PYTHONPATH}:/app/perch-main"
+
+#COPY ./tests /app/tests
+
+RUN pip install librosa
+RUN pip install numpy 
+RUN pip install pytest pytest-mock
+
+RUN useradd -u 1000 -ms /bin/bash appuser
+
diff --git a/Dockerfile copy b/Dockerfile copy
@@ -0,0 +1,41 @@
+FROM --platform=linux/amd64 python:3.10-bookworm as perch_runner
+
+RUN apt update 
+RUN apt install -y libsndfile1 ffmpeg
+
+# download and install poetry
+# consider changing to pip install 'poetry==$POETRY_VERSION'
+# see https://stackoverflow.com/questions/53835198/integrating-python-poetry-with-docker
+RUN curl -sSL https://install.python-poetry.org | python3 -
+
+RUN mkdir /app && mkdir /app/src
+WORKDIR /app
+
+# download and unzip Perch
+ARG perch_repo=https://github.com/google-research/chirp/archive/refs/heads/main.zip
+RUN wget $perch_repo && unzip main.zip && rm main.zip
+# WORKDIR /app/perch-main
+WORKDIR /app
+
+# we use our own pyproject file modified from the perch on
+# because we have extra dev dependencies, and also we remove some unecessary
+# deps from the basic perch
+COPY ./pyproject.toml /app
+
+# install perch dependencies (not in venv since we are using docker)
+ENV PATH="/root/.local/bin:$PATH"
+RUN poetry config virtualenvs.create false --local
+# RUN /root/.local/bin/poetry install
+# this is due to "connection pool is full" error
+# https://stackoverflow.com/questions/74385209/poetry-install-throws-connection-pool-is-full-discarding-connection-pypi-org
+RUN poetry config installer.max-workers 10
+RUN poetry install --no-interaction --no-ansi -vvv
+
+# install perch_runner dependencies
+
+COPY ./src /app/src
+
+RUN python /app/src/download_model.py --version 4 --destination /models
+
+ENV PYTHONPATH "${PYTHONPATH}:/app/perch-main"
+
diff --git a/Dockerfile_alpine b/Dockerfile_alpine
@@ -0,0 +1,60 @@
+FROM --platform=linux/amd64 python:3.10-alpine as perch_runner_dev
+
+# Install system dependencies
+RUN apk update && apk add --no-cache libsndfile ffmpeg git
+
+# download and install poetry
+# consider changing to pip install 'poetry==$POETRY_VERSION'
+# see https://stackoverflow.com/questions/53835198/integrating-python-poetry-with-docker
+# RUN curl -sSL https://install.python-poetry.org | python3 -
+
+RUN mkdir /app && mkdir /app/src
+WORKDIR /app
+
+
+# we use our own pyproject file modified from the perch on
+# because we have extra dev dependencies, and also we remove some unecessary
+# deps from the basic perch
+COPY ./pyproject.toml /app
+
+# install perch dependencies (not in venv since we are using docker)
+# ENV PATH="/root/.local/bin:$PATH"
+# RUN poetry config virtualenvs.create false --local
+# RUN /root/.local/bin/poetry install
+# # this is due to "connection pool is full" error
+# # https://stackoverflow.com/questions/74385209/poetry-install-throws-connection-pool-is-full-discarding-connection-pypi-org
+# RUN poetry config installer.max-workers 10
+# RUN poetry install --no-interaction --no-ansi -vvv
+
+# install perch_runner dependencies
+
+# RUN pip install git+https://github.com/google-research/perch.git@8cc4468afaac730e77d84ac447f0874f09d10a25
+RUN pip install git+https://github.com/google-research/perch.git@3746672d406c6cfe48acb0e725248cea05f57445
+
+# there seems to be a problem with the way flax interacts with jaxlib 
+# this is a temporary fix to see if it helps without introducing other problems
+# RUN pip install --upgrade flax jax jaxlib
+
+#COPY --from=perch_runner_build /app/requirements.txt /app
+
+# RUN pip install .
+# RUN pip install .[dev]
+
+COPY ./src /app/src
+
+# this is the trained linear models
+COPY ./models /models
+
+# this is the embedding model
+RUN python /app/src/download_model.py --version 4 --destination /models
+
+# ENV PYTHONPATH "${PYTHONPATH}:/app/perch-main"
+
+#COPY ./tests /app/tests
+
+RUN pip install librosa
+RUN pip install numpy 
+RUN pip install pytest pytest-mock
+
+RUN useradd -u 1000 -ms /bin/bash appuser
+
diff --git a/Docs.md b/Docs.md
@@ -0,0 +1,25 @@
+This document contains instructions to run one of the following recognizers over unlabelled audio
+- pw (Plains Wanderer)
+- cgw (Carpentarian Grass Wren)
+
+# Setup
+
+1. Prepare the paths to the relevant folders 
+
+  You will need the following paths on your computer
+  1. A folder with audio recordings to analyse (input files)
+  2. A writable folder where we can store temporary files
+  3. A writeable folder where we can save the results for each input file
+
+2. Install docker if you don't have it.
+
+You will need **Docker** installed on your computer
+
+To install docker, please see `https://docs.docker.com/engine/install/`. Docker is software that can run a docker *container*. The recognizer (i.e. the trained model and the scripts needed to run it) and its dependencies are all bundled into this container. 
+
+
+# Running the recognizer
+
+Then, open a terminal shell and enter the following command
+
+./Scripts/run_inference.sh