1
1
{
2
2
"cells" : [
3
3
{
4
- "cell_type" : " code" ,
5
- "execution_count" : null ,
6
- "metadata" : {},
7
- "outputs" : [],
8
- "source" : [
9
- " # nuclio: ignore\n " ,
10
- " import nuclio"
11
- ]
12
- },
13
- {
14
- "cell_type" : " code" ,
15
- "execution_count" : null ,
16
- "metadata" : {},
17
- "outputs" : [],
18
- "source" : [
19
- " %nuclio config kind = \" job\"\n " ,
20
- " %nuclio config spec.image = \" mlrun/ml-models\" "
21
- ]
22
- },
23
- {
24
- "cell_type" : " code" ,
25
- "execution_count" : null ,
26
- "metadata" : {},
27
- "outputs" : [],
28
- "source" : [
29
- " # Copyright 2018 Iguazio\n " ,
30
- " #\n " ,
31
- " # Licensed under the Apache License, Version 2.0 (the \" License\" );\n " ,
32
- " # you may not use this file except in compliance with the License.\n " ,
33
- " # You may obtain a copy of the License at\n " ,
34
- " #\n " ,
35
- " # http://www.apache.org/licenses/LICENSE-2.0\n " ,
36
- " #\n " ,
37
- " # Unless required by applicable law or agreed to in writing, software\n " ,
38
- " # distributed under the License is distributed on an \" AS IS\" BASIS,\n " ,
39
- " # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.\n " ,
40
- " # See the License for the specific language governing permissions and\n " ,
41
- " # limitations under the License.\n " ,
42
- " \n " ,
43
- " import numpy as np\n " ,
44
- " import pandas as pd\n " ,
45
- " from mlrun.execution import MLClientCtx\n " ,
46
- " \n " ,
47
- " \n " ,
48
- " def load_dataset(\n " ,
49
- " context: MLClientCtx,\n " ,
50
- " dataset: str,\n " ,
51
- " name: str = '',\n " ,
52
- " file_ext: str = 'parquet',\n " ,
53
- " params: dict = {}\n " ,
54
- " ) -> None:\n " ,
55
- " \"\"\" Loads a scikit-learn toy dataset for classification or regression\n " ,
56
- " \n " ,
57
- " The following datasets are available ('name' : desription):\n " ,
58
- " \n " ,
59
- " 'boston' : boston house-prices dataset (regression)\n " ,
60
- " 'iris' : iris dataset (classification)\n " ,
61
- " 'diabetes' : diabetes dataset (regression)\n " ,
62
- " 'digits' : digits dataset (classification)\n " ,
63
- " 'linnerud' : linnerud dataset (multivariate regression)\n " ,
64
- " 'wine' : wine dataset (classification)\n " ,
65
- " 'breast_cancer' : breast cancer wisconsin dataset (classification)\n " ,
66
- " \n " ,
67
- " The scikit-learn functions return a data bunch including the following items:\n " ,
68
- " - data the features matrix\n " ,
69
- " - target the ground truth labels\n " ,
70
- " - DESCR a description of the dataset\n " ,
71
- " - feature_names header for data\n " ,
72
- " \n " ,
73
- " The features (and their names) are stored with the target labels in a DataFrame.\n " ,
74
- " \n " ,
75
- " For further details see https://scikit-learn.org/stable/datasets/index.html#toy-datasets\n " ,
76
- " \n " ,
77
- " :param context: function execution context\n " ,
78
- " :param dataset: name of the dataset to load\n " ,
79
- " :param name: artifact name (defaults to dataset)\n " ,
80
- " :param file_ext: output file_ext: parquet or csv\n " ,
81
- " :param params: params of the sklearn load_data method\n " ,
82
- " \"\"\"\n " ,
83
- " dataset = str(dataset)\n " ,
84
- " # reach into module and import the appropriate load_xxx function\n " ,
85
- " pkg_module = 'sklearn.datasets'\n " ,
86
- " fname = f'load_{dataset}'\n " ,
87
- " \n " ,
88
- " pkg_module = __import__(pkg_module, fromlist=[fname])\n " ,
89
- " load_data_fn = getattr(pkg_module, fname)\n " ,
90
- " \n " ,
91
- " data = load_data_fn(**params)\n " ,
92
- " feature_names = data['feature_names']\n " ,
93
- " \n " ,
94
- " # create the toy dataset\n " ,
95
- " xy = np.concatenate([data['data'], data['target'].reshape(-1, 1)], axis=1)\n " ,
96
- " if hasattr(feature_names, 'append'):\n " ,
97
- " # its a list\n " ,
98
- " feature_names.append('labels')\n " ,
99
- " else:\n " ,
100
- " # its an array\n " ,
101
- " feature_names = np.append(feature_names, 'labels')\n " ,
102
- " df = pd.DataFrame(data=xy, columns=feature_names)\n " ,
103
- " \n " ,
104
- " # log and upload the dataset\n " ,
105
- " context.log_dataset(name or dataset, df=df, format=file_ext, index=False)"
106
- ]
107
- },
108
- {
109
- "cell_type" : " code" ,
110
- "execution_count" : null ,
111
- "metadata" : {},
112
- "outputs" : [],
4
+ "cell_type" : " markdown" ,
113
5
"source" : [
114
- " # nuclio: end-code"
115
- ]
6
+ " # Load Dataset"
7
+ ],
8
+ "metadata" : {
9
+ "collapsed" : false
10
+ }
116
11
},
117
12
{
118
13
"cell_type" : " markdown" ,
119
14
"metadata" : {},
120
15
"source" : [
121
- " ### mlconfig "
16
+ " ## Configuration "
122
17
]
123
18
},
124
19
{
127
22
"metadata" : {},
128
23
"outputs" : [],
129
24
"source" : [
130
- " from mlrun import mlconf\n " ,
131
25
" import os\n " ,
132
26
" \n " ,
27
+ " from mlrun import mlconf\n " ,
28
+ " \n " ,
133
29
" mlconf.dbpath = mlconf.dbpath or 'http://mlrun-api:8080'\n " ,
134
- " mlconf.artifact_path = mlconf.artifact_path or f'{os.environ[\" HOME\" ]}/artifacts'"
30
+ " mlconf.artifact_path = mlconf.artifact_path or f'{os.environ[\" HOME\" ]}/artifacts'\n "
135
31
]
136
32
},
137
33
{
138
34
"cell_type" : " markdown" ,
139
- "metadata" : {},
140
35
"source" : [
141
- " ### save"
142
- ]
36
+ " ## Run Locally"
37
+ ],
38
+ "metadata" : {
39
+ "collapsed" : false
40
+ }
143
41
},
144
42
{
145
43
"cell_type" : " code" ,
146
44
"execution_count" : null ,
147
- "metadata" : {},
148
45
"outputs" : [],
149
46
"source" : [
150
- " from mlrun import code_to_function \n " ,
151
- " # create job function object from notebook code\n " ,
152
- " fn = code_to_function(\" load_dataset\" )\n " ,
47
+ " from mlrun import run_local\n " ,
48
+ " from load_dataset import load_dataset\n " ,
153
49
" \n " ,
154
- " # add metadata (for templates and reuse)\n " ,
155
- " fn.spec.default_handler = \" load_dataset\"\n " ,
156
- " fn.spec.description = \" load a toy dataset from scikit-learn\"\n " ,
157
- " fn.metadata.categories = [\" data-source\" , \" ml\" ]\n " ,
158
- " fn.metadata.labels = {\" author\" : \" yjb\" , \" framework\" : \" sklearn\" }\n " ,
159
- " fn.export(\" function.yaml\" )"
160
- ]
50
+ " for dataset in [\" wine\" , \" iris\" , \" breast_cancer\" ]:\n " ,
51
+ " run_local(\n " ,
52
+ " handler=load_dataset,\n " ,
53
+ " inputs={\" dataset\" : dataset},\n " ,
54
+ " artifact_path=mlconf.artifact_path\n " ,
55
+ " )"
56
+ ],
57
+ "metadata" : {
58
+ "collapsed" : false ,
59
+ "pycharm" : {
60
+ "name" : " #%%\n "
61
+ }
62
+ }
161
63
},
162
64
{
163
65
"cell_type" : " markdown" ,
164
66
"metadata" : {},
165
67
"source" : [
166
- " ## tests "
68
+ " ## Run remotely \n "
167
69
]
168
70
},
169
71
{
170
72
"cell_type" : " code" ,
171
73
"execution_count" : null ,
172
- "metadata" : {},
173
74
"outputs" : [],
174
75
"source" : [
175
- " # load function from marketplacen\n " ,
176
76
" from mlrun import import_function\n " ,
77
+ " from mlrun import NewTask\n " ,
177
78
" \n " ,
178
- " # vcs_branch = 'development'\n " ,
179
- " # base_vcs = f'https://raw.githubusercontent.com/mlrun/functions/{vcs_branch}/'\n " ,
180
- " # mlconf.hub_url = mlconf.hub_url or base_vcs + f'{name}/function.yaml'\n " ,
181
- " # fn = import_function(\" hub://load_dataset\" )"
182
- ]
183
- },
184
- {
185
- "cell_type" : " code" ,
186
- "execution_count" : null ,
187
- "metadata" : {},
188
- "outputs" : [],
189
- "source" : [
190
- " if \" V3IO_HOME\" in list(os.environ):\n " ,
79
+ " fn = import_function(\" hub://load_dataset\" )\n " ,
80
+ " \n " ,
81
+ " if \" V3IO_HOME\" in os.environ:\n " ,
191
82
" from mlrun import mount_v3io\n " ,
192
83
" fn.apply(mount_v3io())\n " ,
193
84
" else:\n " ,
194
85
" # is you set up mlrun using the instructions at https://github.com/mlrun/mlrun/blob/master/hack/local/README.md\n " ,
195
86
" from mlrun.platforms import mount_pvc\n " ,
196
- " fn.apply(mount_pvc('nfsvol', 'nfsvol', '/home/joyan/data'))"
197
- ]
198
- },
199
- {
200
- "cell_type" : " code" ,
201
- "execution_count" : null ,
202
- "metadata" : {},
203
- "outputs" : [],
204
- "source" : [
205
- " from mlrun import NewTask \n " ,
87
+ " fn.apply(mount_pvc('nfsvol', 'nfsvol', '/home/joyan/data'))\n " ,
88
+ " \n " ,
89
+ " task_params = {\" name\" : \" tasks load toy dataset\" , \" params\" : {\" dataset\" : \" wine\" }}\n " ,
206
90
" \n " ,
207
- " task_params = {\n " ,
208
- " \" name\" : \" tasks load toy dataset\" , \n " ,
209
- " \" params\" : {\" dataset\" : \" wine\" }}"
210
- ]
211
- },
212
- {
213
- "cell_type" : " markdown" ,
214
- "metadata" : {},
215
- "source" : [
216
- " ### run remotely"
217
- ]
218
- },
219
- {
220
- "cell_type" : " code" ,
221
- "execution_count" : null ,
222
- "metadata" : {},
223
- "outputs" : [],
224
- "source" : [
225
91
" run = fn.run(NewTask(**task_params), artifact_path=mlconf.artifact_path)"
226
- ]
227
- },
228
- {
229
- "cell_type" : " markdown" ,
230
- "metadata" : {},
231
- "source" : [
232
- " ### or locally"
233
- ]
234
- },
235
- {
236
- "cell_type" : " code" ,
237
- "execution_count" : null ,
238
- "metadata" : {},
239
- "outputs" : [],
240
- "source" : [
241
- " from mlrun import run_local"
242
- ]
243
- },
244
- {
245
- "cell_type" : " code" ,
246
- "execution_count" : null ,
247
- "metadata" : {},
248
- "outputs" : [],
249
- "source" : [
250
- " for dataset in [\" wine\" , \" iris\" , \" breast_cancer\" ]:\n " ,
251
- " run_local(handler=load_dataset,\n " ,
252
- " inputs={\" dataset\" : dataset}, artifact_path=mlconf.artifact_path)"
253
- ]
254
- },
255
- {
256
- "cell_type" : " code" ,
257
- "execution_count" : null ,
258
- "metadata" : {},
259
- "outputs" : [],
260
- "source" : []
92
+ ],
93
+ "metadata" : {
94
+ "collapsed" : false ,
95
+ "pycharm" : {
96
+ "name" : " #%%\n "
97
+ }
98
+ }
261
99
}
262
100
],
263
101
"metadata" : {
281
119
},
282
120
"nbformat" : 4 ,
283
121
"nbformat_minor" : 4
284
- }
122
+ }
0 commit comments