diff --git a/.github/workflows/pull_request_tests.yml b/.github/workflows/pull_request_tests.yml index 7f4cb41..92893a4 100644 --- a/.github/workflows/pull_request_tests.yml +++ b/.github/workflows/pull_request_tests.yml @@ -33,6 +33,10 @@ jobs: fetch-depth: 0 ref: ${{ github.event.pull_request.head.ref }} + - name: Clear runner space + run: | + rm -rf /opt/hostedtoolcache + - name: Set up Python ${{ matrix.python-version }} uses: actions/setup-python@v2 with: @@ -67,7 +71,7 @@ jobs: run: | mkdir -p ~/MLOps_data/mlflow_minio mkdir -p ~/MLOps_data/mlflow_db - docker compose -f mlflow_server/docker-compose.yml up -d --build + docker compose -f mlflow_server/docker-compose.yml up -d --build --wait - name: Setup flake8 annotations uses: rbialon/flake8-annotations@v1 diff --git a/.github/workflows/test_cli.yml b/.github/workflows/test_cli.yml index 966c0cd..ea9ed41 100644 --- a/.github/workflows/test_cli.yml +++ b/.github/workflows/test_cli.yml @@ -33,26 +33,29 @@ jobs: fetch-depth: 0 ref: ${{ github.event.pull_request.head.ref }} - - name: Set up Python ${{ matrix.python-version }} - uses: actions/setup-python@v2 - with: - python-version: ${{ matrix.python-version }} - - name: Clear runner space run: | rm -rf /opt/hostedtoolcache + - name: Set up Python ${{ matrix.python-version }} + uses: actions/setup-python@v5 + with: + python-version: ${{ matrix.python-version }} + - name: Install csc-mlops run: | python -m pip install --upgrade pip build python -m build - pip install dist/*.whl + python -m pip install dist/*.whl - name: start MLOps server + # --wait blocks until all services are healthy or completed before returning. + # createbuckets depends on minio being healthy, so the mlflow bucket is guaranteed + # to exist before the test runs. run: | mkdir -p ~/MLOps_data/mlflow_minio mkdir -p ~/MLOps_data/mlflow_db - docker compose -f mlflow_server/docker-compose.yml up -d --build + docker compose -f mlflow_server/docker-compose.yml up -d --build --wait - name: Test run CLI run: | diff --git a/mlflow_server/docker-compose.yml b/mlflow_server/docker-compose.yml index 0a8ad38..f3400f0 100644 --- a/mlflow_server/docker-compose.yml +++ b/mlflow_server/docker-compose.yml @@ -43,9 +43,10 @@ services: target: /data createbuckets: - image: minio/mc + image: minio/mc:RELEASE.2022-11-07T23-47-39Z depends_on: - - minio + minio: + condition: service_healthy environment: - NO_PROXY=minio,db,localhost networks: @@ -54,7 +55,7 @@ services: /bin/sh -c " /usr/bin/mc alias set myminio http://minio:9000 ${AWS_ACCESS_KEY_ID} ${AWS_SECRET_ACCESS_KEY}; /usr/bin/mc mb myminio/mlflow; - /usr/bin/mc policy set public myminio/mlflow; + /usr/bin/mc anonymous set public myminio/mlflow; exit 0;" db: @@ -81,8 +82,10 @@ services: image: mlflow_server container_name: mlflow_server depends_on: - - "minio" - - "db" + db: + condition: service_started + createbuckets: + condition: service_completed_successfully networks: - frontend - backend diff --git a/mlops/Experiment.py b/mlops/Experiment.py index 86b5188..c74a3df 100644 --- a/mlops/Experiment.py +++ b/mlops/Experiment.py @@ -1,6 +1,8 @@ +import atexit import configparser import logging import os +import shutil import subprocess import sys @@ -21,7 +23,8 @@ class Experiment: def __init__(self, script, config_path, project_path: str = '.', - verbose: bool = True, ignore_git_check: bool = False): + verbose: bool = True, ignore_git_check: bool = False, + include_path: str = None): """ The Experiment class is the interface through which all projects should be run. :param script: path to script to run @@ -37,6 +40,7 @@ def __init__(self, script, config_path, project_path: str = '.', self.project_path = project_path self.verbose = verbose self.auth = None + self.include_path = include_path if 'pytest' in sys.modules: logger.warning('DEBUG ONLY - ignoring git checks due to test run detected') @@ -54,6 +58,7 @@ def __init__(self, script, config_path, project_path: str = '.', self.env_setup() self.build_project_file() self.init_experiment() + self.add_path() if self.verbose: self.print_experiment_info() @@ -152,6 +157,26 @@ def init_experiment(self): # self.configure_minio() self.experiment_id = exp_id + def add_path(self): + """ + Copies the folder at self.include_path to current context + :return: + """ + + if self.include_path: + folder_name = os.path.basename(os.path.abspath(self.include_path)) + included_folder_dest = os.path.join(self.project_path, folder_name) + + if not os.path.exists(self.include_path): + raise FileNotFoundError(f"Source folder does not exist: {self.include_path}") + + if os.path.exists(included_folder_dest): + raise FileExistsError(f"Destination folder already exists: {included_folder_dest}. Please remove it first or rename your source folder.") + + shutil.copytree(self.include_path, included_folder_dest) + logger.info(f'Copied {self.include_path} to {included_folder_dest}') + atexit.register(shutil.rmtree, included_folder_dest, ignore_errors=True) # remove folder at exit + def print_experiment_info(self): """ Prints basic experiment info to logger diff --git a/mlops/cli.py b/mlops/cli.py index b280bd8..5d1e5b3 100644 --- a/mlops/cli.py +++ b/mlops/cli.py @@ -47,7 +47,8 @@ def cli(ctx): show_default=True, default=False) @click.option('--ignore_git_check', is_flag=True, show_default=True, default=False, help='TESTING ONLY - ignore git checks, occasionally it might be necessary to ignore the git checks for example, offline testing, do not use this feature if working on tracked models') -def run(script, config_path, run_name, ignore_git_check, shared_memory, logging_level, rebuild_docker): +@click.option('-i', '--include_path', 'include_path', help='Path to a folder to copy into the project directory before building the Docker image', default=None, type=click.Path(exists=True, file_okay=False)) +def run(script, config_path, run_name, ignore_git_check, shared_memory, logging_level, rebuild_docker, include_path): """ Runs python project using csc-mlops framework. @@ -57,14 +58,15 @@ def run(script, config_path, run_name, ignore_git_check, shared_memory, logging_ # create Experiment exp = Experiment(script, config_path=config_path, - ignore_git_check=ignore_git_check + ignore_git_check=ignore_git_check, + include_path=include_path ) # run Experiment exp.run(docker_args={}, run_name=run_name, rebuild_docker=rebuild_docker, - shared_memory=shared_memory, + shared_memory=shared_memory ) diff --git a/tests/data/requirements.txt b/tests/data/requirements.txt index e8867c0..5ed8c83 100644 --- a/tests/data/requirements.txt +++ b/tests/data/requirements.txt @@ -1,6 +1,6 @@ numpy -f https://download.pytorch.org/whl/torch_stable.html torch==2.0.1+cpu -mlflow +mlflow==2.10.0 boto3 requests==2.31.0