Sudivate/environment (#217)

sudivate · web-flow · commit c773db995958 · 2020-03-01T22:32:52.000-08:00
reusable aml enviroment
diff --git a/.env.example b/.env.example
@@ -15,6 +15,7 @@ WORKSPACE_NAME = 'aml-workspace'
 EXPERIMENT_NAME = ''
 
 # AML Compute Cluster Config
+AML_ENV_NAME='diabetes_regression_training_env'
 AML_COMPUTE_CLUSTER_NAME = 'train-cluster'
 AML_COMPUTE_CLUSTER_CPU_SKU = 'STANDARD_DS2_V2'
 AML_CLUSTER_MAX_NODES = '4'
diff --git a/.pipelines/diabetes_regression-variables-template.yml b/.pipelines/diabetes_regression-variables-template.yml
@@ -29,6 +29,8 @@ variables:
     value: diabetes_regression_model.pkl
 
   # AML Compute Cluster Config
+  - name: AML_ENV_NAME
+    value: diabetes_regression_training_env
   - name: AML_COMPUTE_CLUSTER_CPU_SKU
     value: STANDARD_DS2_V2
   - name: AML_COMPUTE_CLUSTER_NAME
diff --git a/bootstrap/bootstrap.py b/bootstrap/bootstrap.py
@@ -103,7 +103,6 @@ def replaceprojectname(project_dir, project_name, rename_name):
             r"ml_service/pipelines/diabetes_regression_build_train_pipeline.py",  # NOQA: E501
             r"ml_service/pipelines/diabetes_regression_verify_train_pipeline.py",  # NOQA: E501
             r"ml_service/util/create_scoring_image.py",
-            r"diabetes_regression/azureml_environment.json",
             r"diabetes_regression/conda_dependencies.yml",
             r"diabetes_regression/evaluate/evaluate_model.py",
             r"diabetes_regression/register/register_model.py",
diff --git a/diabetes_regression/azureml_environment.json b/diabetes_regression/azureml_environment.json
diff --git a/diabetes_regression/conda_dependencies.yml b/diabetes_regression/conda_dependencies.yml
@@ -1,32 +1,40 @@
+# Conda environment specification. The dependencies defined in this file will
+# be automatically provisioned for managed runs. These include runs against
+# the localdocker, remotedocker, and cluster compute targets.
+
+# Note that this file is NOT used to automatically manage dependencies for the
+# local compute target. To provision these dependencies locally, run:
+# conda env update --file conda_dependencies.yml
+
 # Details about the Conda environment file format:
 # https://conda.io/docs/using/envs.html#create-environment-file-by-hand
 
-name: diabetes_regression_sklearn
+# For managing Spark packages and configuration, see spark_dependencies.yml.
+# Version of this configuration file's structure and semantics in AzureML.
+# This directive is stored in a comment to preserve the Conda file structure.
+# [AzureMlVersion] = 2
 
+name: diabetes_regression_training_env
 dependencies:
   # The python interpreter version.
+  # Currently Azure ML Workbench only supports 3.5.2 and later.
   - python=3.7.5
-
   # Required by azureml-defaults, installed separately through Conda to
   # get a prebuilt version and not require build tools for the install.
   - psutil=5.6 #latest
 
-  - numpy=1.18.1
-  - pandas=1.0.0
-  - scikit-learn=0.22.1
-  #- r-essentials
-  #- tensorflow
-  #- keras
-
-  - pip=20.0.2
   - pip:
-      # Dependencies for training environment.
-
-      - azureml-core==1.0.85
-
-      # Dependencies for scoring environment.
-
-      # You must list azureml-defaults as a pip dependency
-      - azureml-defaults==1.0.85
-      - inference-schema[numpy-support]==1.0.1
-      - azureml-dataprep==1.1.38
+      # Required packages for AzureML execution, history, and data preparation.
+      - azureml-model-management-sdk==1.0.1b6.post1
+      - azureml-sdk==1.0.74
+      - scipy==1.3.1
+      - scikit-learn==0.22
+      - pandas==0.25.3
+      - numpy==1.17.3
+      - joblib==0.14.0
+      - gunicorn==19.9.0
+      - flask==1.1.1
+      - inference-schema[numpy-support]
+      - azure
+      - azure-storage-blob
+      - azureml-dataprep
diff --git a/diabetes_regression/scoring/inference_config.yml b/diabetes_regression/scoring/inference_config.yml
@@ -6,4 +6,4 @@ schemaFile:
 sourceDirectory:
 enableGpu: False
 baseImage:
-baseImageRegistry:
+baseImageRegistry:
diff --git a/docs/code_description.md b/docs/code_description.md
@@ -62,7 +62,6 @@ The repository provides a template with folders structure suitable for maintaini
 
 ### Environment Definitions
 
-- `diabetes_regression/azureml_environment.json` : Azure ML environment definition for the training environment, including base Docker image and a reference to `conda_dependencies.yml` Conda environment file.
 - `diabetes_regression/conda_dependencies.yml` : Conda environment definition for the environment used for both training and scoring (Docker image in which train.py and score.py are run).
 - `diabetes_regression/ci_dependencies.yml` : Conda environment definition for the CI environment.
 
diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline.py
@@ -1,11 +1,12 @@
 from azureml.pipeline.core.graph import PipelineParameter
 from azureml.pipeline.steps import PythonScriptStep
 from azureml.pipeline.core import Pipeline, PipelineData
-from azureml.core import Workspace, Environment
+from azureml.core import Workspace
 from azureml.core.runconfig import RunConfiguration
 from azureml.core import Dataset
 from ml_service.util.attach_compute import get_compute
 from ml_service.util.env_variables import Env
+from ml_service.util.manage_environment import get_environment
 from sklearn.datasets import load_diabetes
 import pandas as pd
 import os
@@ -31,17 +32,16 @@ def main():
         print("aml_compute:")
         print(aml_compute)
 
-    # Create a reusable run configuration environment
-    # Read definition from diabetes_regression/azureml_environment.json
-    environment = Environment.load_from_directory(e.sources_directory_train)
-    if (e.collection_uri is not None and e.teamproject_name is not None):
-        builduri_base = e.collection_uri + e.teamproject_name
-        builduri_base = builduri_base + "/_build/results?buildId="
-        environment.environment_variables["BUILDURI_BASE"] = builduri_base
-    environment.register(aml_workspace)
+    # Create a reusable Azure ML environment
+    environment = get_environment(
+        aml_workspace, e.aml_env_name, create_new=False)  # NOQA: E501
 
     run_config = RunConfiguration()
     run_config.environment = environment
+    if (e.collection_uri is not None and e.teamproject_name is not None):
+        builduri_base = e.collection_uri + e.teamproject_name
+        builduri_base = builduri_base + "/_build/results?buildId="
+        run_config.environment.environment_variables["BUILDURI_BASE"] = builduri_base  # NOQA: E501
 
     model_name_param = PipelineParameter(
         name="model_name", default_value=e.model_name)
diff --git a/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py b/ml_service/pipelines/diabetes_regression_build_train_pipeline_with_r.py
@@ -1,9 +1,10 @@
 from azureml.pipeline.steps import PythonScriptStep
 from azureml.pipeline.core import Pipeline
-from azureml.core import Workspace, Environment
+from azureml.core import Workspace
 from azureml.core.runconfig import RunConfiguration
 from ml_service.util.attach_compute import get_compute
 from ml_service.util.env_variables import Env
+from ml_service.util.manage_environment import get_environment
 
 
 def main():
@@ -26,19 +27,17 @@ def main():
         print("aml_compute:")
         print(aml_compute)
 
-    # Create a reusable run configuration environment
-    # Read definition from diabetes_regression/azureml_environment.json
+    # Create a reusable Azure ML environment
     # Make sure to include `r-essentials'
     #   in diabetes_regression/conda_dependencies.yml
-    environment = Environment.load_from_directory(e.sources_directory_train)
+    environment = get_environment(
+        aml_workspace, e.aml_env_name, create_new=False)  # NOQA: E501
+    run_config = RunConfiguration()
+    run_config.environment = environment
     if (e.collection_uri is not None and e.teamproject_name is not None):
         builduri_base = e.collection_uri + e.teamproject_name
         builduri_base = builduri_base + "/_build/results?buildId="
-        environment.environment_variables["BUILDURI_BASE"] = builduri_base
-    environment.register(aml_workspace)
-
-    run_config = RunConfiguration()
-    run_config.environment = environment
+        run_config.environment.environment_variables["BUILDURI_BASE"] = builduri_base  # NOQA: E501
 
     train_step = PythonScriptStep(
         name="Train Model",
diff --git a/ml_service/util/env_variables.py b/ml_service/util/env_variables.py
@@ -44,6 +44,7 @@ def __init__(self):
         self._run_evaluation = os.environ.get("RUN_EVALUATION", "true")
         self._allow_run_cancel = os.environ.get(
             "ALLOW_RUN_CANCEL", "true")
+        self._aml_env_name = os.environ.get("AML_ENV_NAME")
 
     @property
     def workspace_name(self):
@@ -156,3 +157,7 @@ def run_evaluation(self):
     @property
     def allow_run_cancel(self):
         return self._allow_run_cancel
+
+    @property
+    def aml_env_name(self):
+        return self._aml_env_name
diff --git a/ml_service/util/manage_environment.py b/ml_service/util/manage_environment.py
@@ -0,0 +1,29 @@
+from azureml.core import Workspace, Environment
+from ml_service.util.env_variables import Env
+import os
+
+
+def get_environment(
+    workspace: Workspace,
+    environment_name: str,
+    create_new: bool = False
+):
+    try:
+        e = Env()
+        environments = Environment.list(workspace=workspace)
+        restored_environment = None
+        for env in environments:
+            if env == environment_name:
+                restored_environment = environments[environment_name]
+
+        if restored_environment is None or create_new:
+            new_env = Environment.from_conda_specification(environment_name, os.path.join(e.sources_directory_train, "conda_dependencies.yml"))  # NOQA: E501
+            restored_environment = new_env
+            restored_environment.register(workspace)
+
+        if restored_environment is not None:
+            print(restored_environment)
+        return restored_environment
+    except Exception as e:
+        print(e)
+        exit(1)