1
1
from azureml .pipeline .core .graph import PipelineParameter
2
2
from azureml .pipeline .steps import PythonScriptStep
3
3
from azureml .pipeline .core import Pipeline , PipelineData
4
- from azureml .core import Workspace
4
+ from azureml .core import Workspace , Dataset , Datastore
5
5
from azureml .core .runconfig import RunConfiguration
6
- from azureml .core import Dataset
7
6
from ml_service .util .attach_compute import get_compute
8
7
from ml_service .util .env_variables import Env
9
8
from ml_service .util .manage_environment import get_environment
@@ -39,8 +38,20 @@ def main():
39
38
run_config = RunConfiguration ()
40
39
run_config .environment = environment
41
40
41
+ if (e .datastore_name ):
42
+ datastore_name = e .datastore_name
43
+ else :
44
+ datastore_name = aml_workspace .get_default_datastore ().name
45
+ run_config .environment .environment_variables ["DATASTORE_NAME" ] = datastore_name # NOQA: E501
46
+
42
47
model_name_param = PipelineParameter (
43
48
name = "model_name" , default_value = e .model_name )
49
+ dataset_version_param = PipelineParameter (
50
+ name = "dataset_version" , default_value = e .dataset_version )
51
+ data_file_path_param = PipelineParameter (
52
+ name = "data_file_path" , default_value = "none" )
53
+ caller_run_id_param = PipelineParameter (
54
+ name = "caller_run_id" , default_value = "none" )
44
55
45
56
# Get dataset name
46
57
dataset_name = e .dataset_name
@@ -57,9 +68,9 @@ def main():
57
68
df .to_csv (file_name , index = False )
58
69
59
70
# Upload file to default datastore in workspace
60
- default_ds = aml_workspace . get_default_datastore ( )
71
+ datatstore = Datastore . get ( aml_workspace , datastore_name )
61
72
target_path = 'training-data/'
62
- default_ds .upload_files (
73
+ datatstore .upload_files (
63
74
files = [file_name ],
64
75
target_path = target_path ,
65
76
overwrite = True ,
@@ -68,17 +79,14 @@ def main():
68
79
# Register dataset
69
80
path_on_datastore = os .path .join (target_path , file_name )
70
81
dataset = Dataset .Tabular .from_delimited_files (
71
- path = (default_ds , path_on_datastore ))
82
+ path = (datatstore , path_on_datastore ))
72
83
dataset = dataset .register (
73
84
workspace = aml_workspace ,
74
85
name = dataset_name ,
75
86
description = 'diabetes training data' ,
76
87
tags = {'format' : 'CSV' },
77
88
create_new_version = True )
78
89
79
- # Get the dataset
80
- dataset = Dataset .get_by_name (aml_workspace , dataset_name )
81
-
82
90
# Create a PipelineData to pass data between steps
83
91
pipeline_data = PipelineData (
84
92
'pipeline_data' ,
@@ -89,11 +97,14 @@ def main():
89
97
script_name = e .train_script_path ,
90
98
compute_target = aml_compute ,
91
99
source_directory = e .sources_directory_train ,
92
- inputs = [dataset .as_named_input ('training_data' )],
93
100
outputs = [pipeline_data ],
94
101
arguments = [
95
102
"--model_name" , model_name_param ,
96
- "--step_output" , pipeline_data
103
+ "--step_output" , pipeline_data ,
104
+ "--dataset_version" , dataset_version_param ,
105
+ "--data_file_path" , data_file_path_param ,
106
+ "--caller_run_id" , caller_run_id_param ,
107
+ "--dataset_name" , dataset_name ,
97
108
],
98
109
runconfig = run_config ,
99
110
allow_reuse = False ,
0 commit comments