Skip to content

Commit c773db9

Browse files
authored
Sudivate/environment (#217)
reusable aml enviroment
1 parent 4529c18 commit c773db9

11 files changed

+83
-80
lines changed

.env.example

+1
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@ WORKSPACE_NAME = 'aml-workspace'
1515
EXPERIMENT_NAME = ''
1616

1717
# AML Compute Cluster Config
18+
AML_ENV_NAME='diabetes_regression_training_env'
1819
AML_COMPUTE_CLUSTER_NAME = 'train-cluster'
1920
AML_COMPUTE_CLUSTER_CPU_SKU = 'STANDARD_DS2_V2'
2021
AML_CLUSTER_MAX_NODES = '4'

.pipelines/diabetes_regression-variables-template.yml

+2
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,8 @@ variables:
2929
value: diabetes_regression_model.pkl
3030

3131
# AML Compute Cluster Config
32+
- name: AML_ENV_NAME
33+
value: diabetes_regression_training_env
3234
- name: AML_COMPUTE_CLUSTER_CPU_SKU
3335
value: STANDARD_DS2_V2
3436
- name: AML_COMPUTE_CLUSTER_NAME

bootstrap/bootstrap.py

-1
Original file line numberDiff line numberDiff line change
@@ -103,7 +103,6 @@ def replaceprojectname(project_dir, project_name, rename_name):
103103
r"ml_service/pipelines/diabetes_regression_build_train_pipeline.py", # NOQA: E501
104104
r"ml_service/pipelines/diabetes_regression_verify_train_pipeline.py", # NOQA: E501
105105
r"ml_service/util/create_scoring_image.py",
106-
r"diabetes_regression/azureml_environment.json",
107106
r"diabetes_regression/conda_dependencies.yml",
108107
r"diabetes_regression/evaluate/evaluate_model.py",
109108
r"diabetes_regression/register/register_model.py",

diabetes_regression/azureml_environment.json

-39
This file was deleted.
+28-20
Original file line numberDiff line numberDiff line change
@@ -1,32 +1,40 @@
1+
# Conda environment specification. The dependencies defined in this file will
2+
# be automatically provisioned for managed runs. These include runs against
3+
# the localdocker, remotedocker, and cluster compute targets.
4+
5+
# Note that this file is NOT used to automatically manage dependencies for the
6+
# local compute target. To provision these dependencies locally, run:
7+
# conda env update --file conda_dependencies.yml
8+
19
# Details about the Conda environment file format:
210
# https://conda.io/docs/using/envs.html#create-environment-file-by-hand
311

4-
name: diabetes_regression_sklearn
12+
# For managing Spark packages and configuration, see spark_dependencies.yml.
13+
# Version of this configuration file's structure and semantics in AzureML.
14+
# This directive is stored in a comment to preserve the Conda file structure.
15+
# [AzureMlVersion] = 2
516

17+
name: diabetes_regression_training_env
618
dependencies:
719
# The python interpreter version.
20+
# Currently Azure ML Workbench only supports 3.5.2 and later.
821
- python=3.7.5
9-
1022
# Required by azureml-defaults, installed separately through Conda to
1123
# get a prebuilt version and not require build tools for the install.
1224
- psutil=5.6 #latest
1325

14-
- numpy=1.18.1
15-
- pandas=1.0.0
16-
- scikit-learn=0.22.1
17-
#- r-essentials
18-
#- tensorflow
19-
#- keras
20-
21-
- pip=20.0.2
2226
- pip:
23-
# Dependencies for training environment.
24-
25-
- azureml-core==1.0.85
26-
27-
# Dependencies for scoring environment.
28-
29-
# You must list azureml-defaults as a pip dependency
30-
- azureml-defaults==1.0.85
31-
- inference-schema[numpy-support]==1.0.1
32-
- azureml-dataprep==1.1.38
27+
# Required packages for AzureML execution, history, and data preparation.
28+
- azureml-model-management-sdk==1.0.1b6.post1
29+
- azureml-sdk==1.0.74
30+
- scipy==1.3.1
31+
- scikit-learn==0.22
32+
- pandas==0.25.3
33+
- numpy==1.17.3
34+
- joblib==0.14.0
35+
- gunicorn==19.9.0
36+
- flask==1.1.1
37+
- inference-schema[numpy-support]
38+
- azure
39+
- azure-storage-blob
40+
- azureml-dataprep

diabetes_regression/scoring/inference_config.yml

+1-1
Original file line numberDiff line numberDiff line change
@@ -6,4 +6,4 @@ schemaFile:
66
sourceDirectory:
77
enableGpu: False
88
baseImage:
9-
baseImageRegistry:
9+
baseImageRegistry:

docs/code_description.md

-1
Original file line numberDiff line numberDiff line change
@@ -62,7 +62,6 @@ The repository provides a template with folders structure suitable for maintaini
6262

6363
### Environment Definitions
6464

65-
- `diabetes_regression/azureml_environment.json` : Azure ML environment definition for the training environment, including base Docker image and a reference to `conda_dependencies.yml` Conda environment file.
6665
- `diabetes_regression/conda_dependencies.yml` : Conda environment definition for the environment used for both training and scoring (Docker image in which train.py and score.py are run).
6766
- `diabetes_regression/ci_dependencies.yml` : Conda environment definition for the CI environment.
6867

ml_service/pipelines/diabetes_regression_build_train_pipeline.py

+9-9
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,12 @@
11
from azureml.pipeline.core.graph import PipelineParameter
22
from azureml.pipeline.steps import PythonScriptStep
33
from azureml.pipeline.core import Pipeline, PipelineData
4-
from azureml.core import Workspace, Environment
4+
from azureml.core import Workspace
55
from azureml.core.runconfig import RunConfiguration
66
from azureml.core import Dataset
77
from ml_service.util.attach_compute import get_compute
88
from ml_service.util.env_variables import Env
9+
from ml_service.util.manage_environment import get_environment
910
from sklearn.datasets import load_diabetes
1011
import pandas as pd
1112
import os
@@ -31,17 +32,16 @@ def main():
3132
print("aml_compute:")
3233
print(aml_compute)
3334

34-
# Create a reusable run configuration environment
35-
# Read definition from diabetes_regression/azureml_environment.json
36-
environment = Environment.load_from_directory(e.sources_directory_train)
37-
if (e.collection_uri is not None and e.teamproject_name is not None):
38-
builduri_base = e.collection_uri + e.teamproject_name
39-
builduri_base = builduri_base + "/_build/results?buildId="
40-
environment.environment_variables["BUILDURI_BASE"] = builduri_base
41-
environment.register(aml_workspace)
35+
# Create a reusable Azure ML environment
36+
environment = get_environment(
37+
aml_workspace, e.aml_env_name, create_new=False) # NOQA: E501
4238

4339
run_config = RunConfiguration()
4440
run_config.environment = environment
41+
if (e.collection_uri is not None and e.teamproject_name is not None):
42+
builduri_base = e.collection_uri + e.teamproject_name
43+
builduri_base = builduri_base + "/_build/results?buildId="
44+
run_config.environment.environment_variables["BUILDURI_BASE"] = builduri_base # NOQA: E501
4545

4646
model_name_param = PipelineParameter(
4747
name="model_name", default_value=e.model_name)

ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py

+8-9
Original file line numberDiff line numberDiff line change
@@ -1,9 +1,10 @@
11
from azureml.pipeline.steps import PythonScriptStep
22
from azureml.pipeline.core import Pipeline
3-
from azureml.core import Workspace, Environment
3+
from azureml.core import Workspace
44
from azureml.core.runconfig import RunConfiguration
55
from ml_service.util.attach_compute import get_compute
66
from ml_service.util.env_variables import Env
7+
from ml_service.util.manage_environment import get_environment
78

89

910
def main():
@@ -26,19 +27,17 @@ def main():
2627
print("aml_compute:")
2728
print(aml_compute)
2829

29-
# Create a reusable run configuration environment
30-
# Read definition from diabetes_regression/azureml_environment.json
30+
# Create a reusable Azure ML environment
3131
# Make sure to include `r-essentials'
3232
# in diabetes_regression/conda_dependencies.yml
33-
environment = Environment.load_from_directory(e.sources_directory_train)
33+
environment = get_environment(
34+
aml_workspace, e.aml_env_name, create_new=False) # NOQA: E501
35+
run_config = RunConfiguration()
36+
run_config.environment = environment
3437
if (e.collection_uri is not None and e.teamproject_name is not None):
3538
builduri_base = e.collection_uri + e.teamproject_name
3639
builduri_base = builduri_base + "/_build/results?buildId="
37-
environment.environment_variables["BUILDURI_BASE"] = builduri_base
38-
environment.register(aml_workspace)
39-
40-
run_config = RunConfiguration()
41-
run_config.environment = environment
40+
run_config.environment.environment_variables["BUILDURI_BASE"] = builduri_base # NOQA: E501
4241

4342
train_step = PythonScriptStep(
4443
name="Train Model",

ml_service/util/env_variables.py

+5
Original file line numberDiff line numberDiff line change
@@ -44,6 +44,7 @@ def __init__(self):
4444
self._run_evaluation = os.environ.get("RUN_EVALUATION", "true")
4545
self._allow_run_cancel = os.environ.get(
4646
"ALLOW_RUN_CANCEL", "true")
47+
self._aml_env_name = os.environ.get("AML_ENV_NAME")
4748

4849
@property
4950
def workspace_name(self):
@@ -156,3 +157,7 @@ def run_evaluation(self):
156157
@property
157158
def allow_run_cancel(self):
158159
return self._allow_run_cancel
160+
161+
@property
162+
def aml_env_name(self):
163+
return self._aml_env_name

ml_service/util/manage_environment.py

+29
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
from azureml.core import Workspace, Environment
2+
from ml_service.util.env_variables import Env
3+
import os
4+
5+
6+
def get_environment(
7+
workspace: Workspace,
8+
environment_name: str,
9+
create_new: bool = False
10+
):
11+
try:
12+
e = Env()
13+
environments = Environment.list(workspace=workspace)
14+
restored_environment = None
15+
for env in environments:
16+
if env == environment_name:
17+
restored_environment = environments[environment_name]
18+
19+
if restored_environment is None or create_new:
20+
new_env = Environment.from_conda_specification(environment_name, os.path.join(e.sources_directory_train, "conda_dependencies.yml")) # NOQA: E501
21+
restored_environment = new_env
22+
restored_environment.register(workspace)
23+
24+
if restored_environment is not None:
25+
print(restored_environment)
26+
return restored_environment
27+
except Exception as e:
28+
print(e)
29+
exit(1)

0 commit comments

Comments
 (0)